diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 9c6c1c93b976aaf350cc1f9b3bdc538308fdf08b..936202b73696c8529b32c05b2356c7316fabc542 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1638,6 +1638,7 @@ OBJS = \ tree-vect-loop.o \ tree-vect-loop-manip.o \ tree-vect-slp.o \ + tree-vect-slp-patterns.o \ tree-vectorizer.o \ tree-vector-builder.o \ tree-vrp.o \ diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index a5ae4143a8c1293e674b499120372ee5fe5c412b..c86df5cd843084a5b7933ef99a23386891a7b0c1 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -709,7 +709,8 @@ loop. The pass is implemented in @file{tree-vectorizer.c} (the main driver), @file{tree-vect-loop.c} and @file{tree-vect-loop-manip.c} (loop specific parts and general loop utilities), @file{tree-vect-slp} (loop-aware SLP -functionality), @file{tree-vect-stmts.c} and @file{tree-vect-data-refs.c}. +functionality), @file{tree-vect-stmts.c}, @file{tree-vect-data-refs.c} and +@file{tree-vect-slp-patterns.c} containing the SLP pattern matcher. Analysis of data references is in @file{tree-data-ref.c}. SLP Vectorization. This pass performs vectorization of straight-line code. The diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c new file mode 100644 index 0000000000000000000000000000000000000000..f605f68d2a14c4bf4941f97b7c1d57f6acb5ffb1 --- /dev/null +++ b/gcc/tree-vect-slp-patterns.c @@ -0,0 +1,310 @@ +/* SLP - Pattern matcher on SLP trees + Copyright (C) 2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "ssa.h" +#include "optabs-tree.h" +#include "insn-config.h" +#include "recog.h" /* FIXME: for insn_data */ +#include "fold-const.h" +#include "stor-layout.h" +#include "gimple-iterator.h" +#include "cfgloop.h" +#include "tree-vectorizer.h" +#include "langhooks.h" +#include "gimple-walk.h" +#include "dbgcnt.h" +#include "tree-vector-builder.h" +#include "vec-perm-indices.h" +#include "gimple-fold.h" +#include "internal-fn.h" + +/* SLP Pattern matching mechanism. + + This extension to the SLP vectorizer allows one to transform the generated SLP + tree based on any pattern. The difference between this and the normal vect + pattern matcher is that unlike the former, this matcher allows you to match + with instructions that do not belong to the same SSA dominator graph. + + The only requirement that this pattern matcher has is that you are only + only allowed to either match an entire group or none. + + As an example, the following simple loop: + + double a[restrict N]; double b[restrict N]; double c[restrict N]; + + for (int i=0; i < N; i+=2) + { + c[i] = a[i] - b[i+1]; + c[i+1] = a[i+1] + b[i]; + } + + which represents a complex addition on with a rotation of 90* around the + argand plane. i.e. if `a` and `b` were complex numbers then this would be the + same as `a + (b * I)`. + + Here the expressions for `c[i]` and `c[i+1]` are independent but have to be + both recognized in order for the pattern to work. As an SLP tree this is + represented as + + +--------------------------------+ + | stmt 0 *_9 = _10; | + | stmt 1 *_15 = _16; | + +--------------------------------+ + | + | + v + +--------------------------------+ + | stmt 0 _10 = _4 - _8; | + | stmt 1 _16 = _12 + _14; | + | lane permutation { 0[0] 1[1] } | + +--------------------------------+ + | | + | | + | | + +-----+ | | +-----+ + | | | | | | + +-----| { } |<-----+ +----->| { } --------+ + | | | +------------------| | | + | +-----+ | +-----+ | + | | | | + | | | | + | +------|------------------+ | + | | | | + v v v v + +--------------------------+ +--------------------------------+ + | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; | + | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; | + | load permutation { 1 0 } | | load permutation { 0 1 } | + +--------------------------+ +--------------------------------+ + + The pattern matcher allows you to replace both statements 0 and 1 or none at + all. You are also allowed to replace and match on any number of nodes. + + The pattern matcher uses a sliding window to handle unrolled cases. Every + pattern has to declare the number of statements that they consume. The + pattern matcher uses this to incrementally ask if the pattern can be applied. + This is done using the method `matches ()`. + + If the pattern can be applied a VecPatternMatch is returned which contains all + state information on where the match was found. This is stored in a list of + operations to perform. If the match cannot be applied then the current + pattern is aborted and no changes made to the tree. + + The pattern matcher has two modes: + + 1) pre-order traversal is used to perform a check to see if the pattern can be + applied or not. If the pattern can be applied then a second step is + performed that allows the pattern to rewrite it's children. This step is + required because the application of a pattern can change the layout of the + tree which affects the nodes that are still to be matched. This is + performed using `validate_p ()`. + + 2) post-order traversal is used to actually perform the rewriting of the + matches found earlier. This is done by calling `build ()` on all matches + that were found earlier. + + The pattern matcher currently only allows you to perform replacements to + internal functions. + + To add a new pattern, implement the VectPattern class and add the type to + slp_patterns. */ + +/* VectSimplePatternMatch holds contextual information about a single match + found in the SLP tree. The use of the class is to allow you to defer + performing any modifications to the SLP tree until they are to be done. By + calling build () the modifications are done in-place as to allow also re- + writing of the root node. */ + +class VectSimplePatternMatch : public VectPatternMatch +{ + protected: + uint8_t m_arity; + vec m_ifn_args; + internal_fn m_ifn; + vec_info *m_vinfo; + int m_idx, m_num_args; + tree m_type, m_vectype; + slp_tree m_node; + int m_pos; + + public: + VectSimplePatternMatch (uint8_t arity, vec ifn_args, + internal_fn ifn, vec_info *vinfo, int idx, + slp_tree node, tree type, tree vectype, + int num_args) + { + /* Number of statements the pattern matches against. */ + this->m_arity = arity; + + /* Arguments to be used when building the new stmts using the IFN. */ + this->m_ifn_args = ifn_args.copy (); + + /* The IFN to create the new statements with. */ + this->m_ifn = ifn; + + /* The vectorization information for the current loop. */ + this->m_vinfo = vinfo; + + /* The index in the sliding window where the statements were matched. */ + this->m_idx = idx; + + /* The number of arguments required to create the new IFN. */ + this->m_num_args = num_args; + + /* The original scalar type of the statement being replaced. */ + this->m_type = type; + + /* The vector type to create the IFN for. */ + this->m_vectype = vectype; + + /* The node that contains the statement that is being replaced. */ + this->m_node = node; + + /* The current position inside the arity of the statement being replaced. + generally the match can be cached and re-used for multiple stmts. */ + this->m_pos = 0; + + gcc_assert ((unsigned)(num_args * arity) == ifn_args.length ()); + } + + uint8_t get_arity () + { + return this->m_arity; + } + + internal_fn get_IFN () + { + return this->m_ifn; + } + + const vec get_IFN_args () + { + return this->m_ifn_args; + } + + /* Create a replacement pattern statement for STMT_INFO and inserts the new + statement into NODE. The statement is created as call to internal + function IFN with arguments ARGS. The arity of IFN needs to match the + amount of elements in ARGS. The scalar type of the statement as TYPE and + the corresponding vector type VECTYPE. These two types are used to + construct the new vector only replacement pattern statement. + + Futhermore the new pattern is also added to the vectorization information + structure VINFO and the old statement STMT_INFO is marked as unused while + the new statement is marked as used and the number of SLP uses of the new + statement is incremented. + + The newly created SLP nodes are marked as SLP only and will be dissolved + if SLP is aborted. + + The newly created gimple call is returned and the BB remains unchanged. + */ + + gcall *build () + { + stmt_vec_info stmt_info; + + /* Check if this call was made too often. */ + if (this->m_pos >= this->m_arity) + return NULL; + + auto_vec args; + args.create (this->m_num_args); + + /* Create the argument set for use by gimple_build_call_internal_vec. */ + stmt_vec_info arg; + for (int i = 0; i < this->m_num_args; i++) + { + arg = this->m_ifn_args[i + (this->m_pos * this->m_num_args)]; + args.quick_push (gimple_get_lhs (STMT_VINFO_STMT (arg))); + } + + /* Check to see if we haven't created all the nodes already. */ + if (args.is_empty ()) + return NULL; + + /* Calculate the location of the statement in NODE to replace. */ + int entry = this->m_idx - (this->m_arity - 1) + this->m_pos; + stmt_info = SLP_TREE_SCALAR_STMTS (this->m_node)[entry]; + + /* Create the new pattern statements. */ + gcall *call_stmt = gimple_build_call_internal_vec (this->m_ifn, args); + tree var = make_temp_ssa_name (this->m_type, call_stmt, "slp_patt"); + gimple* old_stmt = STMT_VINFO_STMT (stmt_info); + gimple_call_set_lhs (call_stmt, var); + gimple_set_location (call_stmt, gimple_location (old_stmt)); + gimple_call_set_nothrow (call_stmt, true); + + /* Adjust the book-keeping for the new and old statements for use during SLP. + This is required to get the right VF and statement during SLP analysis. + These changes are created after relevancy has been set for the nodes as + such we need to manually update them. Any changes will be undone if SLP + is cancelled. */ + stmt_vec_info call_stmt_info = this->m_vinfo->add_stmt (call_stmt); + vect_mark_pattern_stmts (this->m_vinfo, stmt_info, call_stmt, + this->m_vectype); + + /* We have to explicitly mark the old statement as unused because during + statement analysis the original and new pattern statement may require + different level of unrolling. As an example add/sub when vectorized + without a pattern requires 4 copies, whereas with a COMPLEX_ADD pattern + this only requires 2 copies and the two statement will be treated as + hand unrolled. That means that the analysis won't happen as it'll find + a mismatch. So we don't analyze the old statement and if we end up + needing it, e.g. SLP fails then we have to quickly re-analyze it. */ + STMT_VINFO_RELEVANT (stmt_info) = vect_unused_in_scope; + STMT_VINFO_SLP_VECT_ONLY (call_stmt_info) = true; + STMT_VINFO_RELATED_STMT (call_stmt_info) = stmt_info; + + /* Since we are replacing all the statements in the group with the same + thing it doesn't really matter. So just set it every time a new stmt + is created. */ + SLP_TREE_SCALAR_STMTS (this->m_node)[entry] = call_stmt_info; + SLP_TREE_REPRESENTATIVE (this->m_node) = call_stmt_info; + SLP_TREE_CODE (this->m_node) = gimple_expr_code (call_stmt);; + + this->m_pos++; + return call_stmt; + } + + ~VectSimplePatternMatch () + { + this->m_ifn_args.release (); + } +}; + +#define SLP_PATTERN(x) &x::create +VectPatternDecl slp_patterns[] +{ + /* For least amount of back-tracking and more efficient matching + order patterns from the largest to the smallest. Especially if they + overlap in what they can detect. */ +}; +#undef SLP_PATTERN + +size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(VectPatternDecl); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 01189d44d892fc42b132bbb7de1c471df45518ae..947b031a6d492e6a02621dbcf41ba60d96c606f0 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2055,6 +2055,192 @@ calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size) return exact_div (common_multiple (nunits, group_size), group_size); } +/* Helper function of vect_match_slp_patterns. + + Attempts to match the given pattern PATT_INFO against the slp tree rooted in + NODE using VINFO and GROUP_SIZE. + + If matching is successful the value in NODE is updated and returned, if not + then it is returned unchanged. */ + +static bool +vect_match_slp_patterns_2 (slp_tree node, vec_info *vinfo, + unsigned int group_size, VectPatternDecl patt_fn, + poly_uint64 *max_nunits, bool *matches, + unsigned *npermutes, unsigned *tree_size, + scalar_stmts_to_slp_tree_map_t * bst_map) +{ + unsigned i; + stmt_vec_info stmt_info; + if (!node) + return false; + + vec scalar_stmts = SLP_TREE_SCALAR_STMTS (node); + bool found_p = false, found_rec_p = false; + VectPattern *pattern = patt_fn (node, vinfo); + uint8_t n = pattern->get_arity (); + + if (group_size % n != 0) + { + delete pattern; + return false; + } + + /* The data dependency orderings will force the nodes to be created in the + order of their data flow. Which means since we're matching specific + patterns in particular order we only have to do a linear scan here to match + the same instruction multiple times. The group size doesn't have to be + constrained. */ + + for (unsigned i = n - 1; i < scalar_stmts.length (); i += n) + { + stmt_info = scalar_stmts[i]; + + if (gimple_assign_load_p (STMT_VINFO_STMT (stmt_info)) + || gimple_store_p (STMT_VINFO_STMT (stmt_info)) + || gimple_assign_cast_p (STMT_VINFO_STMT (stmt_info))) + break; + + stmt_vec_info *stmt_infos = scalar_stmts.begin () + (i - (n - 1)); + + gcc_assert (stmt_infos); + + if (!pattern->matches (stmt_infos, i)) + { + /* We can only do replacements for entire groups, we must replace all + statements in a node as the argument list/children may not have + equal height then. Operations that don't rewrite the arguments + may be safe to do, so perhaps paramatrise it. */ + + found_p = false; + break; + } + + tree type = gimple_expr_type (STMT_VINFO_STMT (stmt_info)); + tree vectype = get_vectype_for_scalar_type (vinfo, type, node); + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Found %s pattern in SLP tree\n", + pattern->get_name ()); + + if (pattern->is_optab_supported_p (vectype, OPTIMIZE_FOR_SPEED)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Target supports %s vectorization with mode %T\n", + internal_fn_name (pattern->get_last_ifn ()), + vectype); + + found_p = true; + } + else + { + if (dump_enabled_p ()) + { + if (!vectype) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Target does not support vector type for " + "%T\n", type); + else + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Target does not support %s for " + "vector type %T\n", + internal_fn_name (pattern->get_last_ifn ()), + vectype); + } + found_p = false; + } + } + + if (found_p) + { + /* Find which nodes should be the children of the new node. */ + + if (!pattern->validate_p (max_nunits, matches, + npermutes, tree_size, bst_map)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "transformation for %s not valid due to post " + "condition\n", internal_fn_name (pattern->get_last_ifn ())); + found_p = false; + } + } + + /* Perform recursive matching, it's important to do this after matching things + in the current node as the matches here may re-order the nodes below it. + As such the pattern that needs to be subsequently match may change. */ + + if (SLP_TREE_CHILDREN (node).exists ()) { + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + found_rec_p |= vect_match_slp_patterns_2 (child, vinfo, group_size, + patt_fn, max_nunits, matches, + npermutes, tree_size, bst_map); + } + + if (found_p) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "Creating vec patterns\n"); + + while (gcall* call_stmt = pattern->build ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "\t %p stmt: %G", + node, call_stmt); + } + + vect_mark_slp_stmts_relevant (node); + } + + delete pattern; + return found_p | found_rec_p; +} + +/* Applies pattern matching to the given SLP tree rooted in NODE using vec_info + VINFO and group size GROUP_SIZE. + + The modified tree is returned. Patterns are tried in order and multiple + patterns may match. If the permutes need to be cancelled then + CANCEL_PERMUTE is set. */ + +static bool +vect_match_slp_patterns (slp_tree node, vec_info *vinfo, + unsigned int group_size, poly_uint64 *max_nunits, + bool *matches, unsigned *npermutes, + unsigned *tree_size, + scalar_stmts_to_slp_tree_map_t * bst_map) +{ + DUMP_VECT_SCOPE ("vect_match_slp_patterns"); + bool found_p = false; + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "-- before patt match --\n"); + vect_print_slp_graph (MSG_NOTE, vect_location, node); + dump_printf_loc (MSG_NOTE, vect_location, "-- end patt --\n"); + } + + for (unsigned x = 0; x < num__slp_patterns; x++) + found_p |= vect_match_slp_patterns_2 (node, vinfo, group_size, + slp_patterns[x], max_nunits, matches, + npermutes, tree_size, bst_map); + + /* TODO: Remove in final version, only here for generating debug dot graphs + from SLP tree. */ + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "-- start dot --\n"); + vect_print_slp_graph (MSG_NOTE, vect_location, node); + dump_printf_loc (MSG_NOTE, vect_location, "-- end dot --\n"); + } + + return found_p; +} + /* Analyze an SLP instance starting from a group of grouped stores. Call vect_build_slp_tree to build a tree of packed stmts if possible. Return FALSE if it's impossible to SLP any stmt in the loop. */ @@ -2192,6 +2378,17 @@ vect_analyze_slp_instance (vec_info *vinfo, &tree_size, bst_map); if (node != NULL) { + /* Temporarily allow add_stmt calls again. */ + vinfo->stmt_vec_info_ro = false; + + /* See if any patterns can be found in the constructed SLP tree + before we do any analysis on it. */ + vect_match_slp_patterns (node, vinfo, group_size, &max_nunits, + matches, &npermutes, &tree_size, bst_map); + + /* After this no more add_stmt calls are allowed. */ + vinfo->stmt_vec_info_ro = true; + /* Calculate the unrolling factor based on the smallest type. */ poly_uint64 unrolling_factor = calculate_unrolling_factor (max_nunits, group_size); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 79926f1a43534635ddca85556a928e364022c40a..95bbf13b1c733c07b7deb8515c1b17c6979cff21 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -26,6 +26,7 @@ typedef class _stmt_vec_info *stmt_vec_info; #include "tree-data-ref.h" #include "tree-hash-traits.h" #include "target.h" +#include "internal-fn.h" /* Used for naming of new temporaries. */ @@ -2100,6 +2101,99 @@ typedef hash_map , slp_tree, simple_hashmap_traits > scalar_stmts_to_slp_tree_map_t; +/* SLP Pattern matcher types, tree-vect-slp-patterns.c. */ + +class VectPatternMatch +{ + public: + virtual gcall *build () = 0; + virtual internal_fn get_IFN () = 0; + virtual const vec get_IFN_args () = 0; + virtual uint8_t get_arity () = 0; + virtual ~VectPatternMatch () {}; +}; + +class VectPattern +{ + protected: + uint8_t m_arity; + uint8_t m_num_args; + internal_fn m_last_ifn; + int m_last_idx; + slp_tree m_node; + vec_info *m_vinfo; + vec m_matches; + VectPattern (slp_tree node, vec_info *vinfo) + { + this->m_last_ifn = IFN_LAST; + this->m_node = node; + this->m_vinfo = vinfo; + this->m_matches.create (0); + this->m_curr_match = 0; + } + + private: + unsigned m_curr_match; + + public: + static VectPattern* create (slp_tree node, vec_info *vinfo); + virtual bool matches (stmt_vec_info *stmts, int idx) = 0; + + virtual const char* get_name () = 0; + virtual ~VectPattern () + { + int i; + VectPatternMatch *match; + FOR_EACH_VEC_ELT (this->m_matches, i, match) + delete match; + this->m_matches.release (); + } + + virtual gcall *build () + { + if (this->m_curr_match >= this->m_matches.length ()) + return NULL; + + gcall *entry = + this->m_matches[this->m_curr_match]->build (); + + if (entry) + return entry; + + this->m_curr_match++; + return build (); + } + + virtual bool validate_p (poly_uint64 *, bool *, unsigned *, unsigned *, + scalar_stmts_to_slp_tree_map_t *) + { + return true; + } + + virtual uint8_t get_arity () + { + return this->m_arity; + } + + virtual bool is_optab_supported_p ( tree vectype, optimization_type opt_type) + { + if (!vectype) + return false; + + return direct_internal_fn_supported_p (this->m_last_ifn, vectype, + opt_type); + } + + internal_fn get_last_ifn () + { + return this->m_last_ifn; + } +}; + +typedef VectPattern* (*VectPatternDecl) (slp_tree, vec_info *); +extern VectPatternDecl slp_patterns[]; +extern size_t num__slp_patterns; + extern void vect_free_slp_tree (slp_tree node);