From: Robin Dapp <rdapp@linux.vnet.ibm.com>
To: Richard Biener <richard.guenther@gmail.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>,
"Bin.Cheng" <amker.cheng@gmail.com>
Subject: [PATCH 4/4] Vect peeling cost model
Date: Mon, 08 May 2017 16:27:00 -0000 [thread overview]
Message-ID: <8a345817-a775-ffad-958c-b2511bfe46e7@linux.vnet.ibm.com> (raw)
In-Reply-To: <CAFiYyc0TuwUY1oqovVQF93GFPBg=qA7q4+jRFvPuhF2ayVWbHQ@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 274 bytes --]
gcc/ChangeLog:
2017-05-08 Robin Dapp <rdapp@linux.vnet.ibm.com>
* tree-vect-data-refs.c (vect_peeling_hash_get_lowest_cost):
Remove unused variable.
(vect_enhance_data_refs_alignment):
Compare best peelings costs to doing no peeling and choose no
peeling if equal.
[-- Attachment #2: gcc-peeling-p4.diff --]
[-- Type: text/x-patch, Size: 8739 bytes --]
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 786f826..67d2f57 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1293,7 +1293,7 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
{
vect_peel_info elem = *slot;
int dummy;
- unsigned int inside_cost = 0, outside_cost = 0, i;
+ unsigned int inside_cost = 0, outside_cost = 0;
gimple *stmt = DR_STMT (elem->dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@@ -1520,7 +1520,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
enum dr_alignment_support supportable_dr_alignment;
struct data_reference *dr0 = NULL, *first_store = NULL;
struct data_reference *dr;
- struct data_reference *dr0_known_align = NULL;
unsigned int i, j;
bool do_peeling = false;
bool do_versioning = false;
@@ -1720,6 +1719,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|| loop->inner)
do_peeling = false;
+ struct _vect_peel_extended_info peel_for_known_alignment;
+ struct _vect_peel_extended_info peel_for_unknown_alignment;
+ struct _vect_peel_extended_info best_peel;
unsigned int unknown_align_inside_cost = UINT_MAX;
unsigned int unknown_align_outside_cost = UINT_MAX;
unsigned int unknown_align_count = 0;
@@ -1731,74 +1733,72 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
/* Check if the target requires to prefer stores over loads, i.e., if
misaligned stores are more expensive than misaligned loads (taking
drs with same alignment into account). */
- if (first_store && DR_IS_READ (dr0))
- {
- unsigned int load_inside_cost = 0;
- unsigned int load_outside_cost = 0;
- unsigned int store_inside_cost = 0;
- unsigned int store_outside_cost = 0;
- stmt_vector_for_cost dummy;
- dummy.create (2);
- vect_get_peeling_costs_all_drs (dr0,
- &load_inside_cost,
- &load_outside_cost,
- &dummy, vf / 2, vf);
- dummy.release ();
-
+ unsigned int load_inside_cost = 0;
+ unsigned int load_outside_cost = 0;
+ unsigned int store_inside_cost = 0;
+ unsigned int store_outside_cost = 0;
+
+ stmt_vector_for_cost dummy;
+ dummy.create (2);
+ vect_get_peeling_costs_all_drs (dr0,
+ &load_inside_cost,
+ &load_outside_cost,
+ &dummy, vf / 2, vf);
+ dummy.release ();
+
+ if (first_store)
+ {
dummy.create (2);
vect_get_peeling_costs_all_drs (first_store,
&store_inside_cost,
&store_outside_cost,
&dummy, vf / 2, vf);
dummy.release ();
+ }
+ else
+ {
+ store_inside_cost = UINT_MAX;
+ store_outside_cost = UINT_MAX;
+ }
- if (load_inside_cost > store_inside_cost
- || (load_inside_cost == store_inside_cost
- && load_outside_cost > store_outside_cost))
- {
- dr0 = first_store;
- unknown_align_inside_cost = store_inside_cost;
- unknown_align_outside_cost = store_outside_cost;
- }
- else
- {
- unknown_align_inside_cost = load_inside_cost;
- unknown_align_outside_cost = load_outside_cost;
- }
-
- stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
- prologue_cost_vec.create (2);
- epilogue_cost_vec.create (2);
+ if (load_inside_cost > store_inside_cost
+ || (load_inside_cost == store_inside_cost
+ && load_outside_cost > store_outside_cost))
+ {
+ dr0 = first_store;
+ unknown_align_inside_cost = store_inside_cost;
+ unknown_align_outside_cost = store_outside_cost;
+ }
+ else
+ {
+ unknown_align_inside_cost = load_inside_cost;
+ unknown_align_outside_cost = load_outside_cost;
+ }
- int dummy2;
- unknown_align_outside_cost += vect_get_known_peeling_cost
- (loop_vinfo, vf / 2, &dummy2,
- &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
- &prologue_cost_vec, &epilogue_cost_vec);
+ stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
+ prologue_cost_vec.create (2);
+ epilogue_cost_vec.create (2);
- prologue_cost_vec.release ();
- epilogue_cost_vec.release ();
+ int dummy2;
+ unknown_align_outside_cost += vect_get_known_peeling_cost
+ (loop_vinfo, vf / 2, &dummy2,
+ &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+ &prologue_cost_vec, &epilogue_cost_vec);
- unknown_align_count = 1 + STMT_VINFO_SAME_ALIGN_REFS
- (vinfo_for_stmt (DR_STMT (dr0))).length ();
- }
+ prologue_cost_vec.release ();
+ epilogue_cost_vec.release ();
- /* In case there are only loads with different unknown misalignments, use
- peeling only if it may help to align other accesses in the loop or
- if it may help improving load bandwith when we'd end up using
- unaligned loads. */
- tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
- if (!first_store
- && !STMT_VINFO_SAME_ALIGN_REFS (
- vinfo_for_stmt (DR_STMT (dr0))).length ()
- && (vect_supportable_dr_alignment (dr0, false)
- != dr_unaligned_supported
- || (builtin_vectorization_cost (vector_load, dr0_vt, 0)
- == builtin_vectorization_cost (unaligned_load, dr0_vt, -1))))
- do_peeling = false;
+ unknown_align_count = 1 + STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (dr0))).length ();
}
- struct _vect_peel_extended_info peel_for_known_alignment;
+ peel_for_unknown_alignment.peel_info.count = unknown_align_count;
+ peel_for_unknown_alignment.inside_cost = unknown_align_inside_cost;
+ peel_for_unknown_alignment.outside_cost = unknown_align_outside_cost;
+ peel_for_unknown_alignment.peel_info.npeel = 0;
+ peel_for_unknown_alignment.peel_info.dr = dr0;
+ best_peel = peel_for_unknown_alignment;
+
peel_for_known_alignment.inside_cost = UINT_MAX;
peel_for_known_alignment.outside_cost = UINT_MAX;
peel_for_known_alignment.peel_info.count = 0;
@@ -1811,15 +1811,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
/* Choose the best peeling from the hash table. */
peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
(&peeling_htab, loop_vinfo, &npeel, &body_cost_vec);
- dr0_known_align = peel_for_known_alignment.peel_info.dr;
}
/* Compare costs of peeling for known and unknown alignment. */
- if (unknown_align_inside_cost > peel_for_known_alignment.inside_cost
+ if (peel_for_unknown_alignment.inside_cost > peel_for_known_alignment.inside_cost
|| (unknown_align_inside_cost == peel_for_known_alignment.inside_cost
&& unknown_align_outside_cost > peel_for_known_alignment.outside_cost))
{
- dr0 = dr0_known_align;
+ best_peel = peel_for_known_alignment;
}
/* We might still want to try to align the datarefs with unknown
@@ -1827,13 +1826,53 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
less datarefs. */
if (peel_for_known_alignment.peel_info.count * 2 > unknown_align_count)
{
- dr0 = dr0_known_align;
+ best_peel = peel_for_known_alignment;
}
- if (dr0 == dr0_known_align && !npeel)
- do_peeling = false;
- if (dr0 == NULL)
- do_peeling = false;
+ /* Calculate the penalty for no peeling, i.e. leaving everything
+ unaligned.
+ TODO: use something like an adapted vect_get_peeling_costs_all_drs. */
+ unsigned nopeel_inside_cost = 0;
+ unsigned nopeel_outside_cost = 0;
+
+ stmt_vector_for_cost dummy;
+ dummy.create (2);
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
+ {
+ vect_get_data_access_cost (dr, &nopeel_inside_cost,
+ &nopeel_outside_cost, &dummy);
+ }
+ dummy.release ();
+
+ /* Add epilogue costs. As we do no peeling for alignment here, no prologue
+ costs will be recorded. */
+ stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
+ prologue_cost_vec.create (2);
+ epilogue_cost_vec.create (2);
+
+ int dummy2;
+ nopeel_outside_cost += vect_get_known_peeling_cost
+ (loop_vinfo, vf / 2, &dummy2,
+ &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+ &prologue_cost_vec, &epilogue_cost_vec);
+
+ prologue_cost_vec.release ();
+ epilogue_cost_vec.release ();
+
+ npeel = best_peel.peel_info.npeel;
+ dr0 = best_peel.peel_info.dr;
+
+ /* Check if doing no peeling is not more expensive than the best peeling we
+ have so far. */
+ if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
+ && vect_supportable_dr_alignment (dr0, false)
+ && ((nopeel_inside_cost < best_peel.inside_cost)
+ || (nopeel_inside_cost == best_peel.inside_cost
+ && nopeel_outside_cost <= best_peel.outside_cost)))
+ {
+ do_peeling = false;
+ npeel = 0;
+ }
if (do_peeling)
{
next prev parent reply other threads:[~2017-05-08 16:13 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-04-11 14:38 [RFC] S/390: Alignment peeling prolog generation Robin Dapp
2017-04-11 14:57 ` Bin.Cheng
2017-04-11 15:03 ` Robin Dapp
2017-04-11 15:07 ` Bin.Cheng
2017-04-11 16:25 ` Richard Biener
2017-04-12 7:51 ` Robin Dapp
2017-04-12 7:58 ` Richard Biener
2017-05-04 9:04 ` Robin Dapp
2017-05-05 11:04 ` Richard Biener
2017-05-08 16:12 ` Robin Dapp
2017-05-09 10:38 ` Richard Biener
2017-05-11 11:17 ` [PATCH 1/5] Vect peeling cost model Robin Dapp
2017-05-11 11:17 ` [RFC] S/390: Alignment peeling prolog generation Robin Dapp
2017-05-11 12:15 ` Richard Biener
2017-05-11 12:16 ` Richard Biener
2017-05-11 12:48 ` Richard Biener
2017-05-11 11:18 ` [PATCH 2/5] Vect peeling cost model Robin Dapp
2017-05-11 11:19 ` [PATCH 3/5] " Robin Dapp
2017-05-11 11:20 ` [PATCH 4/5] " Robin Dapp
2017-05-11 15:30 ` [PATCH 4/5 v2] " Robin Dapp
2017-05-12 9:36 ` Richard Biener
2017-05-23 15:58 ` [PATCH 2/5 v3] " Robin Dapp
2017-05-23 19:25 ` Richard Sandiford
2017-05-24 7:37 ` Robin Dapp
2017-05-24 7:53 ` Richard Sandiford
2017-05-23 15:58 ` [PATCH 1/5 " Robin Dapp
2017-05-23 15:58 ` [PATCH 0/5 " Robin Dapp
2017-05-24 7:51 ` Richard Biener
2017-05-24 11:57 ` Robin Dapp
2017-05-24 13:56 ` Richard Biener
2017-06-03 17:12 ` Andreas Schwab
2017-06-06 7:13 ` Robin Dapp
2017-06-06 17:26 ` Andreas Schwab
2017-06-07 10:50 ` Robin Dapp
2017-06-07 11:43 ` Andreas Schwab
2017-05-23 15:59 ` [PATCH 4/5 " Robin Dapp
2017-05-31 13:56 ` Christophe Lyon
2017-05-31 14:37 ` Robin Dapp
2017-05-31 14:49 ` Christophe Lyon
2017-05-23 15:59 ` [PATCH 5/5 " Robin Dapp
2017-05-23 16:02 ` [PATCH 3/5 " Robin Dapp
2017-05-11 11:59 ` [PATCH 5/5] " Robin Dapp
2017-05-08 16:13 ` [PATCH 3/4] " Robin Dapp
2017-05-09 10:41 ` Richard Biener
2017-05-08 16:27 ` Robin Dapp [this message]
2017-05-09 10:55 ` [PATCH 4/4] " Richard Biener
2017-05-04 9:04 ` [PATCH 1/3] " Robin Dapp
2017-05-05 10:32 ` Richard Biener
2017-05-04 9:07 ` [PATCH 2/3] " Robin Dapp
2017-05-05 10:37 ` Richard Biener
2017-05-04 9:14 ` [PATCH 3/3] " Robin Dapp
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=8a345817-a775-ffad-958c-b2511bfe46e7@linux.vnet.ibm.com \
--to=rdapp@linux.vnet.ibm.com \
--cc=amker.cheng@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=richard.guenther@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).