diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 9c6c1c93b976aaf350cc1f9b3bdc538308fdf08b..936202b73696c8529b32c05b2356c7316fabc542 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1638,6 +1638,7 @@ OBJS = \
tree-vect-loop.o \
tree-vect-loop-manip.o \
tree-vect-slp.o \
+ tree-vect-slp-patterns.o \
tree-vectorizer.o \
tree-vector-builder.o \
tree-vrp.o \
diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi
index a5ae4143a8c1293e674b499120372ee5fe5c412b..c86df5cd843084a5b7933ef99a23386891a7b0c1 100644
--- a/gcc/doc/passes.texi
+++ b/gcc/doc/passes.texi
@@ -709,7 +709,8 @@ loop.
The pass is implemented in @file{tree-vectorizer.c} (the main driver),
@file{tree-vect-loop.c} and @file{tree-vect-loop-manip.c} (loop specific parts
and general loop utilities), @file{tree-vect-slp} (loop-aware SLP
-functionality), @file{tree-vect-stmts.c} and @file{tree-vect-data-refs.c}.
+functionality), @file{tree-vect-stmts.c}, @file{tree-vect-data-refs.c} and
+@file{tree-vect-slp-patterns.c} containing the SLP pattern matcher.
Analysis of data references is in @file{tree-data-ref.c}.
SLP Vectorization. This pass performs vectorization of straight-line code. The
diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
new file mode 100644
index 0000000000000000000000000000000000000000..f605f68d2a14c4bf4941f97b7c1d57f6acb5ffb1
--- /dev/null
+++ b/gcc/tree-vect-slp-patterns.c
@@ -0,0 +1,310 @@
+/* SLP - Pattern matcher on SLP trees
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "optabs-tree.h"
+#include "insn-config.h"
+#include "recog.h" /* FIXME: for insn_data */
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "gimple-iterator.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
+#include "langhooks.h"
+#include "gimple-walk.h"
+#include "dbgcnt.h"
+#include "tree-vector-builder.h"
+#include "vec-perm-indices.h"
+#include "gimple-fold.h"
+#include "internal-fn.h"
+
+/* SLP Pattern matching mechanism.
+
+ This extension to the SLP vectorizer allows one to transform the generated SLP
+ tree based on any pattern. The difference between this and the normal vect
+ pattern matcher is that unlike the former, this matcher allows you to match
+ with instructions that do not belong to the same SSA dominator graph.
+
+ The only requirement that this pattern matcher has is that you are only
+ only allowed to either match an entire group or none.
+
+ As an example, the following simple loop:
+
+ double a[restrict N]; double b[restrict N]; double c[restrict N];
+
+ for (int i=0; i < N; i+=2)
+ {
+ c[i] = a[i] - b[i+1];
+ c[i+1] = a[i+1] + b[i];
+ }
+
+ which represents a complex addition on with a rotation of 90* around the
+ argand plane. i.e. if `a` and `b` were complex numbers then this would be the
+ same as `a + (b * I)`.
+
+ Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
+ both recognized in order for the pattern to work. As an SLP tree this is
+ represented as
+
+ +--------------------------------+
+ | stmt 0 *_9 = _10; |
+ | stmt 1 *_15 = _16; |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | stmt 0 _10 = _4 - _8; |
+ | stmt 1 _16 = _12 + _14; |
+ | lane permutation { 0[0] 1[1] } |
+ +--------------------------------+
+ | |
+ | |
+ | |
+ +-----+ | | +-----+
+ | | | | | |
+ +-----| { } |<-----+ +----->| { } --------+
+ | | | +------------------| | |
+ | +-----+ | +-----+ |
+ | | | |
+ | | | |
+ | +------|------------------+ |
+ | | | |
+ v v v v
+ +--------------------------+ +--------------------------------+
+ | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
+ | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
+ | load permutation { 1 0 } | | load permutation { 0 1 } |
+ +--------------------------+ +--------------------------------+
+
+ The pattern matcher allows you to replace both statements 0 and 1 or none at
+ all. You are also allowed to replace and match on any number of nodes.
+
+ The pattern matcher uses a sliding window to handle unrolled cases. Every
+ pattern has to declare the number of statements that they consume. The
+ pattern matcher uses this to incrementally ask if the pattern can be applied.
+ This is done using the method `matches ()`.
+
+ If the pattern can be applied a VecPatternMatch is returned which contains all
+ state information on where the match was found. This is stored in a list of
+ operations to perform. If the match cannot be applied then the current
+ pattern is aborted and no changes made to the tree.
+
+ The pattern matcher has two modes:
+
+ 1) pre-order traversal is used to perform a check to see if the pattern can be
+ applied or not. If the pattern can be applied then a second step is
+ performed that allows the pattern to rewrite it's children. This step is
+ required because the application of a pattern can change the layout of the
+ tree which affects the nodes that are still to be matched. This is
+ performed using `validate_p ()`.
+
+ 2) post-order traversal is used to actually perform the rewriting of the
+ matches found earlier. This is done by calling `build ()` on all matches
+ that were found earlier.
+
+ The pattern matcher currently only allows you to perform replacements to
+ internal functions.
+
+ To add a new pattern, implement the VectPattern class and add the type to
+ slp_patterns. */
+
+/* VectSimplePatternMatch holds contextual information about a single match
+ found in the SLP tree. The use of the class is to allow you to defer
+ performing any modifications to the SLP tree until they are to be done. By
+ calling build () the modifications are done in-place as to allow also re-
+ writing of the root node. */
+
+class VectSimplePatternMatch : public VectPatternMatch
+{
+ protected:
+ uint8_t m_arity;
+ vec m_ifn_args;
+ internal_fn m_ifn;
+ vec_info *m_vinfo;
+ int m_idx, m_num_args;
+ tree m_type, m_vectype;
+ slp_tree m_node;
+ int m_pos;
+
+ public:
+ VectSimplePatternMatch (uint8_t arity, vec ifn_args,
+ internal_fn ifn, vec_info *vinfo, int idx,
+ slp_tree node, tree type, tree vectype,
+ int num_args)
+ {
+ /* Number of statements the pattern matches against. */
+ this->m_arity = arity;
+
+ /* Arguments to be used when building the new stmts using the IFN. */
+ this->m_ifn_args = ifn_args.copy ();
+
+ /* The IFN to create the new statements with. */
+ this->m_ifn = ifn;
+
+ /* The vectorization information for the current loop. */
+ this->m_vinfo = vinfo;
+
+ /* The index in the sliding window where the statements were matched. */
+ this->m_idx = idx;
+
+ /* The number of arguments required to create the new IFN. */
+ this->m_num_args = num_args;
+
+ /* The original scalar type of the statement being replaced. */
+ this->m_type = type;
+
+ /* The vector type to create the IFN for. */
+ this->m_vectype = vectype;
+
+ /* The node that contains the statement that is being replaced. */
+ this->m_node = node;
+
+ /* The current position inside the arity of the statement being replaced.
+ generally the match can be cached and re-used for multiple stmts. */
+ this->m_pos = 0;
+
+ gcc_assert ((unsigned)(num_args * arity) == ifn_args.length ());
+ }
+
+ uint8_t get_arity ()
+ {
+ return this->m_arity;
+ }
+
+ internal_fn get_IFN ()
+ {
+ return this->m_ifn;
+ }
+
+ const vec get_IFN_args ()
+ {
+ return this->m_ifn_args;
+ }
+
+ /* Create a replacement pattern statement for STMT_INFO and inserts the new
+ statement into NODE. The statement is created as call to internal
+ function IFN with arguments ARGS. The arity of IFN needs to match the
+ amount of elements in ARGS. The scalar type of the statement as TYPE and
+ the corresponding vector type VECTYPE. These two types are used to
+ construct the new vector only replacement pattern statement.
+
+ Futhermore the new pattern is also added to the vectorization information
+ structure VINFO and the old statement STMT_INFO is marked as unused while
+ the new statement is marked as used and the number of SLP uses of the new
+ statement is incremented.
+
+ The newly created SLP nodes are marked as SLP only and will be dissolved
+ if SLP is aborted.
+
+ The newly created gimple call is returned and the BB remains unchanged.
+ */
+
+ gcall *build ()
+ {
+ stmt_vec_info stmt_info;
+
+ /* Check if this call was made too often. */
+ if (this->m_pos >= this->m_arity)
+ return NULL;
+
+ auto_vec args;
+ args.create (this->m_num_args);
+
+ /* Create the argument set for use by gimple_build_call_internal_vec. */
+ stmt_vec_info arg;
+ for (int i = 0; i < this->m_num_args; i++)
+ {
+ arg = this->m_ifn_args[i + (this->m_pos * this->m_num_args)];
+ args.quick_push (gimple_get_lhs (STMT_VINFO_STMT (arg)));
+ }
+
+ /* Check to see if we haven't created all the nodes already. */
+ if (args.is_empty ())
+ return NULL;
+
+ /* Calculate the location of the statement in NODE to replace. */
+ int entry = this->m_idx - (this->m_arity - 1) + this->m_pos;
+ stmt_info = SLP_TREE_SCALAR_STMTS (this->m_node)[entry];
+
+ /* Create the new pattern statements. */
+ gcall *call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
+ tree var = make_temp_ssa_name (this->m_type, call_stmt, "slp_patt");
+ gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
+ gimple_call_set_lhs (call_stmt, var);
+ gimple_set_location (call_stmt, gimple_location (old_stmt));
+ gimple_call_set_nothrow (call_stmt, true);
+
+ /* Adjust the book-keeping for the new and old statements for use during SLP.
+ This is required to get the right VF and statement during SLP analysis.
+ These changes are created after relevancy has been set for the nodes as
+ such we need to manually update them. Any changes will be undone if SLP
+ is cancelled. */
+ stmt_vec_info call_stmt_info = this->m_vinfo->add_stmt (call_stmt);
+ vect_mark_pattern_stmts (this->m_vinfo, stmt_info, call_stmt,
+ this->m_vectype);
+
+ /* We have to explicitly mark the old statement as unused because during
+ statement analysis the original and new pattern statement may require
+ different level of unrolling. As an example add/sub when vectorized
+ without a pattern requires 4 copies, whereas with a COMPLEX_ADD pattern
+ this only requires 2 copies and the two statement will be treated as
+ hand unrolled. That means that the analysis won't happen as it'll find
+ a mismatch. So we don't analyze the old statement and if we end up
+ needing it, e.g. SLP fails then we have to quickly re-analyze it. */
+ STMT_VINFO_RELEVANT (stmt_info) = vect_unused_in_scope;
+ STMT_VINFO_SLP_VECT_ONLY (call_stmt_info) = true;
+ STMT_VINFO_RELATED_STMT (call_stmt_info) = stmt_info;
+
+ /* Since we are replacing all the statements in the group with the same
+ thing it doesn't really matter. So just set it every time a new stmt
+ is created. */
+ SLP_TREE_SCALAR_STMTS (this->m_node)[entry] = call_stmt_info;
+ SLP_TREE_REPRESENTATIVE (this->m_node) = call_stmt_info;
+ SLP_TREE_CODE (this->m_node) = gimple_expr_code (call_stmt);;
+
+ this->m_pos++;
+ return call_stmt;
+ }
+
+ ~VectSimplePatternMatch ()
+ {
+ this->m_ifn_args.release ();
+ }
+};
+
+#define SLP_PATTERN(x) &x::create
+VectPatternDecl slp_patterns[]
+{
+ /* For least amount of back-tracking and more efficient matching
+ order patterns from the largest to the smallest. Especially if they
+ overlap in what they can detect. */
+};
+#undef SLP_PATTERN
+
+size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(VectPatternDecl);
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 01189d44d892fc42b132bbb7de1c471df45518ae..947b031a6d492e6a02621dbcf41ba60d96c606f0 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2055,6 +2055,192 @@ calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
return exact_div (common_multiple (nunits, group_size), group_size);
}
+/* Helper function of vect_match_slp_patterns.
+
+ Attempts to match the given pattern PATT_INFO against the slp tree rooted in
+ NODE using VINFO and GROUP_SIZE.
+
+ If matching is successful the value in NODE is updated and returned, if not
+ then it is returned unchanged. */
+
+static bool
+vect_match_slp_patterns_2 (slp_tree node, vec_info *vinfo,
+ unsigned int group_size, VectPatternDecl patt_fn,
+ poly_uint64 *max_nunits, bool *matches,
+ unsigned *npermutes, unsigned *tree_size,
+ scalar_stmts_to_slp_tree_map_t * bst_map)
+{
+ unsigned i;
+ stmt_vec_info stmt_info;
+ if (!node)
+ return false;
+
+ vec scalar_stmts = SLP_TREE_SCALAR_STMTS (node);
+ bool found_p = false, found_rec_p = false;
+ VectPattern *pattern = patt_fn (node, vinfo);
+ uint8_t n = pattern->get_arity ();
+
+ if (group_size % n != 0)
+ {
+ delete pattern;
+ return false;
+ }
+
+ /* The data dependency orderings will force the nodes to be created in the
+ order of their data flow. Which means since we're matching specific
+ patterns in particular order we only have to do a linear scan here to match
+ the same instruction multiple times. The group size doesn't have to be
+ constrained. */
+
+ for (unsigned i = n - 1; i < scalar_stmts.length (); i += n)
+ {
+ stmt_info = scalar_stmts[i];
+
+ if (gimple_assign_load_p (STMT_VINFO_STMT (stmt_info))
+ || gimple_store_p (STMT_VINFO_STMT (stmt_info))
+ || gimple_assign_cast_p (STMT_VINFO_STMT (stmt_info)))
+ break;
+
+ stmt_vec_info *stmt_infos = scalar_stmts.begin () + (i - (n - 1));
+
+ gcc_assert (stmt_infos);
+
+ if (!pattern->matches (stmt_infos, i))
+ {
+ /* We can only do replacements for entire groups, we must replace all
+ statements in a node as the argument list/children may not have
+ equal height then. Operations that don't rewrite the arguments
+ may be safe to do, so perhaps paramatrise it. */
+
+ found_p = false;
+ break;
+ }
+
+ tree type = gimple_expr_type (STMT_VINFO_STMT (stmt_info));
+ tree vectype = get_vectype_for_scalar_type (vinfo, type, node);
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Found %s pattern in SLP tree\n",
+ pattern->get_name ());
+
+ if (pattern->is_optab_supported_p (vectype, OPTIMIZE_FOR_SPEED))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Target supports %s vectorization with mode %T\n",
+ internal_fn_name (pattern->get_last_ifn ()),
+ vectype);
+
+ found_p = true;
+ }
+ else
+ {
+ if (dump_enabled_p ())
+ {
+ if (!vectype)
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Target does not support vector type for "
+ "%T\n", type);
+ else
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Target does not support %s for "
+ "vector type %T\n",
+ internal_fn_name (pattern->get_last_ifn ()),
+ vectype);
+ }
+ found_p = false;
+ }
+ }
+
+ if (found_p)
+ {
+ /* Find which nodes should be the children of the new node. */
+
+ if (!pattern->validate_p (max_nunits, matches,
+ npermutes, tree_size, bst_map))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "transformation for %s not valid due to post "
+ "condition\n", internal_fn_name (pattern->get_last_ifn ()));
+ found_p = false;
+ }
+ }
+
+ /* Perform recursive matching, it's important to do this after matching things
+ in the current node as the matches here may re-order the nodes below it.
+ As such the pattern that needs to be subsequently match may change. */
+
+ if (SLP_TREE_CHILDREN (node).exists ()) {
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ found_rec_p |= vect_match_slp_patterns_2 (child, vinfo, group_size,
+ patt_fn, max_nunits, matches,
+ npermutes, tree_size, bst_map);
+ }
+
+ if (found_p)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "Creating vec patterns\n");
+
+ while (gcall* call_stmt = pattern->build ())
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "\t %p stmt: %G",
+ node, call_stmt);
+ }
+
+ vect_mark_slp_stmts_relevant (node);
+ }
+
+ delete pattern;
+ return found_p | found_rec_p;
+}
+
+/* Applies pattern matching to the given SLP tree rooted in NODE using vec_info
+ VINFO and group size GROUP_SIZE.
+
+ The modified tree is returned. Patterns are tried in order and multiple
+ patterns may match. If the permutes need to be cancelled then
+ CANCEL_PERMUTE is set. */
+
+static bool
+vect_match_slp_patterns (slp_tree node, vec_info *vinfo,
+ unsigned int group_size, poly_uint64 *max_nunits,
+ bool *matches, unsigned *npermutes,
+ unsigned *tree_size,
+ scalar_stmts_to_slp_tree_map_t * bst_map)
+{
+ DUMP_VECT_SCOPE ("vect_match_slp_patterns");
+ bool found_p = false;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "-- before patt match --\n");
+ vect_print_slp_graph (MSG_NOTE, vect_location, node);
+ dump_printf_loc (MSG_NOTE, vect_location, "-- end patt --\n");
+ }
+
+ for (unsigned x = 0; x < num__slp_patterns; x++)
+ found_p |= vect_match_slp_patterns_2 (node, vinfo, group_size,
+ slp_patterns[x], max_nunits, matches,
+ npermutes, tree_size, bst_map);
+
+ /* TODO: Remove in final version, only here for generating debug dot graphs
+ from SLP tree. */
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "-- start dot --\n");
+ vect_print_slp_graph (MSG_NOTE, vect_location, node);
+ dump_printf_loc (MSG_NOTE, vect_location, "-- end dot --\n");
+ }
+
+ return found_p;
+}
+
/* Analyze an SLP instance starting from a group of grouped stores. Call
vect_build_slp_tree to build a tree of packed stmts if possible.
Return FALSE if it's impossible to SLP any stmt in the loop. */
@@ -2192,6 +2378,17 @@ vect_analyze_slp_instance (vec_info *vinfo,
&tree_size, bst_map);
if (node != NULL)
{
+ /* Temporarily allow add_stmt calls again. */
+ vinfo->stmt_vec_info_ro = false;
+
+ /* See if any patterns can be found in the constructed SLP tree
+ before we do any analysis on it. */
+ vect_match_slp_patterns (node, vinfo, group_size, &max_nunits,
+ matches, &npermutes, &tree_size, bst_map);
+
+ /* After this no more add_stmt calls are allowed. */
+ vinfo->stmt_vec_info_ro = true;
+
/* Calculate the unrolling factor based on the smallest type. */
poly_uint64 unrolling_factor
= calculate_unrolling_factor (max_nunits, group_size);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 79926f1a43534635ddca85556a928e364022c40a..95bbf13b1c733c07b7deb8515c1b17c6979cff21 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -26,6 +26,7 @@ typedef class _stmt_vec_info *stmt_vec_info;
#include "tree-data-ref.h"
#include "tree-hash-traits.h"
#include "target.h"
+#include "internal-fn.h"
/* Used for naming of new temporaries. */
@@ -2100,6 +2101,99 @@ typedef hash_map , slp_tree,
simple_hashmap_traits >
scalar_stmts_to_slp_tree_map_t;
+/* SLP Pattern matcher types, tree-vect-slp-patterns.c. */
+
+class VectPatternMatch
+{
+ public:
+ virtual gcall *build () = 0;
+ virtual internal_fn get_IFN () = 0;
+ virtual const vec get_IFN_args () = 0;
+ virtual uint8_t get_arity () = 0;
+ virtual ~VectPatternMatch () {};
+};
+
+class VectPattern
+{
+ protected:
+ uint8_t m_arity;
+ uint8_t m_num_args;
+ internal_fn m_last_ifn;
+ int m_last_idx;
+ slp_tree m_node;
+ vec_info *m_vinfo;
+ vec m_matches;
+ VectPattern (slp_tree node, vec_info *vinfo)
+ {
+ this->m_last_ifn = IFN_LAST;
+ this->m_node = node;
+ this->m_vinfo = vinfo;
+ this->m_matches.create (0);
+ this->m_curr_match = 0;
+ }
+
+ private:
+ unsigned m_curr_match;
+
+ public:
+ static VectPattern* create (slp_tree node, vec_info *vinfo);
+ virtual bool matches (stmt_vec_info *stmts, int idx) = 0;
+
+ virtual const char* get_name () = 0;
+ virtual ~VectPattern ()
+ {
+ int i;
+ VectPatternMatch *match;
+ FOR_EACH_VEC_ELT (this->m_matches, i, match)
+ delete match;
+ this->m_matches.release ();
+ }
+
+ virtual gcall *build ()
+ {
+ if (this->m_curr_match >= this->m_matches.length ())
+ return NULL;
+
+ gcall *entry =
+ this->m_matches[this->m_curr_match]->build ();
+
+ if (entry)
+ return entry;
+
+ this->m_curr_match++;
+ return build ();
+ }
+
+ virtual bool validate_p (poly_uint64 *, bool *, unsigned *, unsigned *,
+ scalar_stmts_to_slp_tree_map_t *)
+ {
+ return true;
+ }
+
+ virtual uint8_t get_arity ()
+ {
+ return this->m_arity;
+ }
+
+ virtual bool is_optab_supported_p ( tree vectype, optimization_type opt_type)
+ {
+ if (!vectype)
+ return false;
+
+ return direct_internal_fn_supported_p (this->m_last_ifn, vectype,
+ opt_type);
+ }
+
+ internal_fn get_last_ifn ()
+ {
+ return this->m_last_ifn;
+ }
+};
+
+typedef VectPattern* (*VectPatternDecl) (slp_tree, vec_info *);
+extern VectPatternDecl slp_patterns[];
+extern size_t num__slp_patterns;
+
extern void
vect_free_slp_tree (slp_tree node);