diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 978a08f7b04bea7e7c41f841dc24b773c1adcfe2..885f15c0d3866a1db749d799caffddfe8a390e0b 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1645,6 +1645,7 @@ OBJS = \ tree-vect-loop.o \ tree-vect-loop-manip.o \ tree-vect-slp.o \ + tree-vect-slp-patterns.o \ tree-vectorizer.o \ tree-vector-builder.o \ tree-vrp.o \ diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index a5ae4143a8c1293e674b499120372ee5fe5c412b..c86df5cd843084a5b7933ef99a23386891a7b0c1 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -709,7 +709,8 @@ loop. The pass is implemented in @file{tree-vectorizer.c} (the main driver), @file{tree-vect-loop.c} and @file{tree-vect-loop-manip.c} (loop specific parts and general loop utilities), @file{tree-vect-slp} (loop-aware SLP -functionality), @file{tree-vect-stmts.c} and @file{tree-vect-data-refs.c}. +functionality), @file{tree-vect-stmts.c}, @file{tree-vect-data-refs.c} and +@file{tree-vect-slp-patterns.c} containing the SLP pattern matcher. Analysis of data references is in @file{tree-data-ref.c}. SLP Vectorization. This pass performs vectorization of straight-line code. The diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c new file mode 100644 index 0000000000000000000000000000000000000000..b81cd6856ad4db5a0cda48ca214389f007e060da --- /dev/null +++ b/gcc/tree-vect-slp-patterns.c @@ -0,0 +1,293 @@ +/* SLP - Pattern matcher on SLP trees + Copyright (C) 2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "ssa.h" +#include "optabs-tree.h" +#include "insn-config.h" +#include "recog.h" /* FIXME: for insn_data */ +#include "fold-const.h" +#include "stor-layout.h" +#include "gimple-iterator.h" +#include "cfgloop.h" +#include "tree-vectorizer.h" +#include "langhooks.h" +#include "gimple-walk.h" +#include "dbgcnt.h" +#include "tree-vector-builder.h" +#include "vec-perm-indices.h" +#include "gimple-fold.h" +#include "internal-fn.h" + +/* SLP Pattern matching mechanism. + + This extension to the SLP vectorizer allows one to transform the generated SLP + tree based on any pattern. The difference between this and the normal vect + pattern matcher is that unlike the former, this matcher allows you to match + with instructions that do not belong to the same SSA dominator graph. + + The only requirement that this pattern matcher has is that you are only + only allowed to either match an entire group or none. + + As an example, the following simple loop: + + double a[restrict N]; double b[restrict N]; double c[restrict N]; + + for (int i=0; i < N; i+=2) + { + c[i] = a[i] - b[i+1]; + c[i+1] = a[i+1] + b[i]; + } + + which represents a complex addition on with a rotation of 90* around the + argand plane. i.e. if `a` and `b` were complex numbers then this would be the + same as `a + (b * I)`. + + Here the expressions for `c[i]` and `c[i+1]` are independent but have to be + both recognized in order for the pattern to work. As an SLP tree this is + represented as + + +--------------------------------+ + | stmt 0 *_9 = _10; | + | stmt 1 *_15 = _16; | + +--------------------------------+ + | + | + v + +--------------------------------+ + | stmt 0 _10 = _4 - _8; | + | stmt 1 _16 = _12 + _14; | + | lane permutation { 0[0] 1[1] } | + +--------------------------------+ + | | + | | + | | + +-----+ | | +-----+ + | | | | | | + +-----| { } |<-----+ +----->| { } --------+ + | | | +------------------| | | + | +-----+ | +-----+ | + | | | | + | | | | + | +------|------------------+ | + | | | | + v v v v + +--------------------------+ +--------------------------------+ + | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; | + | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; | + | load permutation { 1 0 } | | load permutation { 0 1 } | + +--------------------------+ +--------------------------------+ + + The pattern matcher allows you to replace both statements 0 and 1 or none at + all. You are also allowed to replace and match on any number of nodes. + + The pattern matcher matches on the representative statement for the SLP node. + In the case of two_operators it allows you to match the children of the node. + This is done using the method `matches ()`. + + If the pattern can be applied a VecPatternMatch is returned which contains all + state information on where the match was found. This is stored in a list of + operations to perform. If the match cannot be applied then the current + pattern is aborted and no changes made to the tree. + + The pattern matcher has two modes: + + 1) First in post-order traversal is used to perform a check to see if the + pattern can be applied or not. If the pattern can be applied then a second + step is performed that allows the pattern to rewrite it's children. This + step is required because the application of a pattern can change the layout + of the tree which affects the nodes that are still to be matched. This is + performed using `validate_p ()`. + + 2) Also in post-order traversal actually perform the rewriting of the + matches found earlier. This is done by calling `build ()` on all matches + that were found earlier. + + The pattern matcher currently only allows you to perform replacements to + internal functions. + + Once the patterns are matched it is one way, these cannot be undone. It is + currently not supported to match patterns recursively. + + To add a new pattern, implement the vect_pattern class and add the type to + slp_patterns. */ + +/******************************************************************************* + * Simple vector pattern matcher + ******************************************************************************/ + +/* vect_simple_pattern_match holds contextual information about a single match + found in the SLP tree. The use of the class is to allow you to defer + performing any modifications to the SLP tree until they are to be done. By + calling build () the modifications are done in-place as to allow also re- + writing of the root node. */ + +class vect_simple_pattern_match : public vect_pattern_match +{ + protected: + uint8_t m_arity; + internal_fn m_ifn; + vec_info *m_vinfo; + int m_num_args; + vec *m_nodes; + + public: + vect_simple_pattern_match (uint8_t, internal_fn, vec_info *, + vec *, int); + gcall *build (); + + uint8_t get_arity () + { + return this->m_arity; + } + + internal_fn get_IFN () + { + return this->m_ifn; + } +}; + +vect_simple_pattern_match::vect_simple_pattern_match (uint8_t arity, + internal_fn ifn, + vec_info *vinfo, + vec *nodes, + int num_args) +{ + /* Number of statements the pattern matches against. */ + this->m_arity = arity; + + /* The IFN to create the new statements with. */ + this->m_ifn = ifn; + + /* The vectorization information for the current loop. */ + this->m_vinfo = vinfo; + + /* The number of arguments required to create the new IFN. */ + this->m_num_args = num_args; + + /* The node that contains the statement that is being replaced. */ + this->m_nodes = nodes; +} + +/* Create a replacement pattern statement for STMT_INFO and inserts the new + statement into NODE. The statement is created as call to internal + function IFN with arguments ARGS. The arity of IFN needs to match the + amount of elements in ARGS. The scalar type of the statement as TYPE and + the corresponding vector type VECTYPE. These two types are used to + construct the new vector only replacement pattern statement. + + Futhermore the new pattern is also added to the vectorization information + structure VINFO and the old statement STMT_INFO is marked as unused while + the new statement is marked as used and the number of SLP uses of the new + statement is incremented. + + The newly created SLP nodes are marked as SLP only and will be dissolved + if SLP is aborted. + + The newly created gimple call is returned and the BB remains unchanged. +*/ + +gcall * +vect_simple_pattern_match::build () +{ + stmt_vec_info stmt_info; + + auto_vec args; + args.create (this->m_num_args); + args.quick_grow_cleared (this->m_num_args); + slp_tree node; + unsigned ix; + stmt_vec_info call_stmt_info; + gcall *call_stmt = NULL; + + FOR_EACH_VEC_ELT (*this->m_nodes, ix, node) + { + /* Calculate the location of the statement in NODE to replace. */ + stmt_info = SLP_TREE_REPRESENTATIVE (node); + gimple* old_stmt = STMT_VINFO_STMT (stmt_info); + tree type = gimple_expr_type (old_stmt); + + /* Create the argument set for use by gimple_build_call_internal_vec. */ + for (int i = 0; i < this->m_num_args; i++) + { + tree var = make_temp_ssa_name (type, old_stmt, "_pa"); + args[i] = var; + } + + /* Create the new pattern statements. */ + call_stmt = gimple_build_call_internal_vec (this->m_ifn, args); + tree var = make_temp_ssa_name (type, call_stmt, "slp_patt"); + gimple_call_set_lhs (call_stmt, var); + gimple_set_location (call_stmt, gimple_location (old_stmt)); + gimple_call_set_nothrow (call_stmt, true); + + /* Adjust the book-keeping for the new and old statements for use during + SLP. This is required to get the right VF and statement during SLP + analysis. These changes are created after relevancy has been set for + the nodes as such we need to manually update them. Any changes will be + undone if SLP is cancelled. */ + call_stmt_info + = this->m_vinfo->add_pattern_stmt (call_stmt, stmt_info); + STMT_SLP_TYPE (call_stmt_info) = pure_slp; + vect_mark_pattern_stmts (this->m_vinfo, stmt_info, call_stmt, + SLP_TREE_VECTYPE (node)); + + /* We have to explicitly mark the old statement as unused because during + statement analysis the original and new pattern statement may require + different level of unrolling. As an example add/sub when vectorized + without a pattern requires 4 copies, whereas with a COMPLEX_ADD pattern + this only requires 2 copies and the two statement will be treated as + hand unrolled. That means that the analysis won't happen as it'll find + a mismatch. So we don't analyze the old statement and if we end up + needing it, e.g. SLP fails then we have to quickly re-analyze it. */ + STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope; + STMT_VINFO_SLP_VECT_ONLY (call_stmt_info) = true; + + /* Since we are replacing all the statements in the group with the same + thing it doesn't really matter. So just set it every time a new stmt + is created. */ + SLP_TREE_REPRESENTATIVE (node) = call_stmt_info; + SLP_TREE_CODE (node) = CALL_EXPR; + } + return call_stmt; +} + +/******************************************************************************* + * Pattern matching definitions + ******************************************************************************/ + +#define SLP_PATTERN(x) &x::create +vect_pattern_decl_t slp_patterns[] +{ + /* For least amount of back-tracking and more efficient matching + order patterns from the largest to the smallest. Especially if they + overlap in what they can detect. */ + +}; +#undef SLP_PATTERN + +size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 30036ec84c74a0e428cc661eacf565224047f9e0..c03fc2fbecad1a2219504ac9daae75495e691775 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1929,6 +1929,13 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, dump_printf (dump_kind, " %u", j); dump_printf (dump_kind, " }\n"); } + if (SLP_TREE_REPRESENTATIVE (node) + && STMT_VINFO_SLP_VECT_ONLY (SLP_TREE_REPRESENTATIVE (node))) + { + dump_printf_loc (metadata, user_loc, "\tSLP Only pattern:\n"); + dump_printf_loc (dump_kind, user_loc, "\t %G", + STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node))); + } if (SLP_TREE_LANE_PERMUTATION (node).exists ()) { dump_printf_loc (metadata, user_loc, "\tlane permutation {"); @@ -2181,6 +2188,147 @@ enum slp_instance_kind { slp_inst_kind_ctor }; +/* Helper function of vect_match_slp_patterns. + + Attempts to match the given pattern PATT_INFO against the slp tree rooted in + NODE using VINFO. + + If matching is successful the value in NODE is updated and returned, if not + then it is returned unchanged. */ + +static bool +vect_match_slp_patterns_2 (slp_tree *ref_node, vec_info *vinfo, + vect_pattern_decl_t patt_fn, hash_set *visited) +{ + unsigned i; + slp_tree node = *ref_node; + bool found_p = false, found_rec_p = false; + if (!node || visited->contains (node)) + return false; + + /* Perform recursive matching, it's important to do this after matching things + in the current node as the matches here may re-order the nodes below it. + As such the pattern that needs to be subsequently match may change. */ + + if (SLP_TREE_CHILDREN (node).exists ()) { + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + found_rec_p |= vect_match_slp_patterns_2 (&SLP_TREE_CHILDREN (node)[i], + vinfo, patt_fn, visited); + } + + vect_pattern *pattern = patt_fn (ref_node, vinfo); + + if (pattern->matches ()) + { + tree vectype = SLP_TREE_VECTYPE (node); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Found %s pattern in SLP tree\n", + pattern->get_name ()); + + if (pattern->is_optab_supported_p (vectype, OPTIMIZE_FOR_SPEED)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Target supports %s vectorization with mode %T\n", + internal_fn_name (pattern->get_ifn ()), vectype); + + found_p = true; + } + else + { + if (dump_enabled_p ()) + { + if (!vectype) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Target does not support vector type for %T\n", + SLP_TREE_DEF_TYPE (node)); + else + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Target does not support %s for vector type " + "%T\n", internal_fn_name (pattern->get_ifn ()), + vectype); + } + found_p = false; + } + } + + if (found_p) + { + /* Find which nodes should be the children of the new node. */ + if (!pattern->validate_p ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "transformation for %s not valid due to post " + "condition\n", + internal_fn_name (pattern->get_ifn ())); + found_p = false; + } + } + + if (found_p) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "Creating vec patterns\n"); + + gcall* call_stmt = pattern->build (); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "\t %p stmt: %G", + node, call_stmt); + } + + delete pattern; + return found_p | found_rec_p; +} + +/* Applies pattern matching to the given SLP tree rooted in REF_NODE using + vec_info VINFO. + + The modified tree is returned. Patterns are tried in order and multiple + patterns may match. */ + +static bool +vect_match_slp_patterns (slp_tree *ref_node, vec_info *vinfo) +{ + DUMP_VECT_SCOPE ("vect_match_slp_patterns"); + bool found_p = false; + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "-- before patt match --\n"); + vect_print_slp_graph (MSG_NOTE, vect_location, *ref_node); + dump_printf_loc (MSG_NOTE, vect_location, "-- end patt --\n"); + } + + hash_set *visited = new hash_set (); + for (unsigned x = 0; x < num__slp_patterns; x++) + { + visited->empty (); + found_p |= vect_match_slp_patterns_2 (ref_node, vinfo, slp_patterns[x], + visited); + } + + delete visited; + + /* TODO: Remove in final version, only here for generating debug dot graphs + from SLP tree. */ + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "-- start dot --\n"); + vect_print_slp_graph (MSG_NOTE, vect_location, *ref_node); + dump_printf_loc (MSG_NOTE, vect_location, "-- end dot --\n"); + } + + return found_p; +} + +/* Analyze an SLP instance starting from a group of grouped stores. Call + vect_build_slp_tree to build a tree of packed stmts if possible. + Return FALSE if it's impossible to SLP any stmt in the loop. */ + static bool vect_analyze_slp_instance (vec_info *vinfo, scalar_stmts_to_slp_tree_map_t *bst_map, @@ -2661,6 +2809,10 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) max_tree_size); } + /* See if any patterns can be found in the SLP tree. */ + FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (vinfo), i, instance) + vect_match_slp_patterns (&SLP_INSTANCE_TREE (instance), vinfo); + /* The map keeps a reference on SLP nodes built, release that. */ for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin (); it != bst_map->end (); ++it) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4bd454cfb185d7036843fc7140b073f525b2ec6a..b813508d3ceaf4c54f612bc10f9aa42ffe0ce0dd 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -27,6 +27,7 @@ typedef class _stmt_vec_info *stmt_vec_info; #include "tree-hash-traits.h" #include "target.h" #include "alloc-pool.h" +#include "internal-fn.h" /* Used for naming of new temporaries. */ @@ -1995,4 +1996,86 @@ void vect_free_loop_info_assumptions (class loop *); gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL); bool vect_stmt_dominates_stmt_p (gimple *, gimple *); +/* SLP Pattern matcher types, tree-vect-slp-patterns.c. */ + +enum _complex_operation : unsigned; + +class vect_pattern_match +{ + public: + virtual gcall *build () = 0; + virtual internal_fn get_IFN () = 0; + virtual uint8_t get_arity () = 0; + virtual ~vect_pattern_match () {}; +}; + +class vect_pattern +{ + protected: + uint8_t m_arity; + uint8_t m_num_args; + internal_fn m_ifn; + slp_tree *m_node; + vec_info *m_vinfo; + vect_pattern_match* m_match; + vec m_ops; + vect_pattern (slp_tree *node, vec_info *vinfo) + { + this->m_ifn = IFN_LAST; + this->m_node = node; + this->m_vinfo = vinfo; + this->m_match = NULL; + this->m_ops.create (0); + } + + public: + static vect_pattern* create (slp_tree *node, vec_info *vinfo); + virtual bool matches () = 0; + virtual bool matches (enum _complex_operation, vec) + { + return false; + } + + virtual const char* get_name () = 0; + virtual ~vect_pattern () + { + this->m_ops.release (); + } + + virtual gcall *build () + { + if (this->m_match == NULL) + return NULL; + + return this->m_match->build (); + } + + virtual bool validate_p () + { + return true; + } + + virtual uint8_t get_arity () + { + return this->m_arity; + } + + virtual bool is_optab_supported_p (tree vectype, optimization_type opt_type) + { + if (!vectype) + return false; + + return direct_internal_fn_supported_p (this->m_ifn, vectype, opt_type); + } + + virtual internal_fn get_ifn () + { + return this->m_ifn; + } +}; + +typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree *, vec_info *); +extern vect_pattern_decl_t slp_patterns[]; +extern size_t num__slp_patterns; + #endif /* GCC_TREE_VECTORIZER_H */