diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 5e3cbe5d736e2cf9429df989d9d95708b1243011..ca2415b79de60c9be44871777d57bcc8d5c07487 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1295,6 +1295,7 @@ OBJS = \
gimple-ssa-isolate-paths.o \
gimple-ssa-nonnull-compare.o \
gimple-ssa-split-paths.o \
+ gimple-ssa-store-merging.o \
gimple-ssa-strength-reduction.o \
gimple-streamer-in.o \
gimple-streamer-out.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 2004fbc7d0d7b1ca65d06cee0f7398f2c9e3fc64..d951ae7cd6fe8fdae17e17b8f5be7c636ea1986d 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1452,6 +1452,10 @@ fstrict-volatile-bitfields
Common Report Var(flag_strict_volatile_bitfields) Init(-1) Optimization
Force bitfield accesses to match their type width.
+fstore-merging
+Common Var(flag_store_merging) Optimization
+Use the tree store merging pass.
+
fguess-branch-probability
Common Report Var(flag_guess_branch_prob) Optimization
Enable guessing of branch probabilities.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 87da1f1c12b718fa63c9b89fdd8f85fbc6b54cb0..2e2a9fdd92733beb6948efc7a5c4b3fc79da73a9 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -401,7 +401,7 @@ Objective-C and Objective-C++ Dialects}.
-fsingle-precision-constant -fsplit-ivs-in-unroller @gol
-fsplit-paths @gol
-fsplit-wide-types -fssa-backprop -fssa-phiopt @gol
--fstdarg-opt -fstrict-aliasing @gol
+-fstdarg-opt -fstore-merging -fstrict-aliasing @gol
-fstrict-overflow -fthread-jumps -ftracer -ftree-bit-ccp @gol
-ftree-builtin-call-dce -ftree-ccp -ftree-ch @gol
-ftree-coalesce-vars -ftree-copy-prop -ftree-dce -ftree-dominator-opts @gol
@@ -412,8 +412,8 @@ Objective-C and Objective-C++ Dialects}.
-ftree-loop-vectorize @gol
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
--ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol
--ftree-vectorize -ftree-vrp -funconstrained-commons @gol
+-ftree-switch-conversion -ftree-tail-merge @gol
+-ftree-ter -ftree-vectorize -ftree-vrp -funconstrained-commons @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-math-optimizations -funswitch-loops @gol
-fipa-ra -fvariable-expansion-in-unroller -fvect-cost-model -fvpt @gol
@@ -6339,6 +6339,7 @@ compilation time.
-fsplit-wide-types @gol
-fssa-backprop @gol
-fssa-phiopt @gol
+-fstore-merging @gol
-ftree-bit-ccp @gol
-ftree-ccp @gol
-ftree-ch @gol
@@ -7673,6 +7674,13 @@ Perform scalar replacement of aggregates. This pass replaces structure
references with scalars to prevent committing structures to memory too
early. This flag is enabled by default at @option{-O} and higher.
+@item -fstore-merging
+@opindex fstore-merging
+Perform merging of narrow stores to consecutive memory addresses. This pass
+merges contigous stores of immediate values narrower than a word into fewer
+wider stores to reduce the number of instructions. This is enabled by default
+at @option{-O} and higher.
+
@item -ftree-ter
@opindex ftree-ter
Perform temporary expression replacement during the SSA->normal phase. Single
diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
new file mode 100644
index 0000000000000000000000000000000000000000..36e2e2a1e6bae3992c3ce84edbb5067dfa504f02
--- /dev/null
+++ b/gcc/gimple-ssa-store-merging.c
@@ -0,0 +1,1038 @@
+/* GIMPLE store merging pass.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ Contributed by ARM Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ . */
+
+/* The purpose of this pass is to combine multiple memory stores of
+ constant values to consecutive memory locations into fewer wider stores.
+ For example, if we have a sequence peforming four byte stores to
+ consecutive memory locations:
+ [p ] := imm1;
+ [p + 1B] := imm2;
+ [p + 2B] := imm3;
+ [p + 3B] := imm4;
+ we can transform this into a single 4-byte store if the target supports it:
+ [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
+
+ The algorithm is applied to each basic block in three phases:
+
+ 1) Scan through the basic block recording constant assignments to
+ destinations that can be expressed as a store to memory of a certain size
+ at a certain bit offset. Record store chains to different bases in a
+ hash_map (m_stores) and make sure to terminate such chains when appropriate
+ (for example when when the stored values get used subsequently).
+ These stores can be a result of structure element initializers, array stores
+ etc. A store_immediate_info object is recorded for every such store.
+ Record as many such assignments to a single base as possible until a
+ statement that interferes with the store sequence is encountered.
+
+ 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
+ store_immediate_info objects) and coalesce contiguous stores into
+ merged_store_group objects.
+
+ For example, given the stores:
+ [p ] := 0;
+ [p + 1B] := 1;
+ [p + 3B] := 0;
+ [p + 4B] := 1;
+ [p + 5B] := 0;
+ [p + 6B] := 0;
+ This phase would produce two merged_store_group objects, one recording the
+ two bytes stored in the memory region [p : p + 1] and another
+ recording the four bytes stored in the memory region [p + 3 : p + 6].
+
+ 3) The merged_store_group objects produced in phase 2) are processed
+ to generate the sequence of wider stores that set the contiguous memory
+ regions to the sequence of bytes that correspond to it. This may emit
+ multiple stores per store group to handle contiguous stores that are not
+ of a size that is a power of 2. For example it can try to emit a 40-bit
+ store as a 32-bit store followed by an 8-bit store.
+ We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
+ SLOW_UNALIGNED_ACCESS rules.
+
+ Note on endianness and example:
+ Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
+ [p ] := 0x1234;
+ [p + 2B] := 0x5678;
+ [p + 4B] := 0xab;
+ [p + 5B] := 0xcd;
+
+ The memory layout for little-endian (LE) and big-endian (BE) must be:
+ p |LE|BE|
+ ---------
+ 0 |34|12|
+ 1 |12|34|
+ 2 |78|56|
+ 3 |56|78|
+ 4 |ab|ab|
+ 5 |cd|cd|
+
+ To merge these into a single 48-bit merged value 'val' in phase 2)
+ on little-endian we insert stores to higher (consecutive) bitpositions
+ into the most significant bits of the merged value.
+ The final merged value would be: 0xcdab56781234
+
+ For big-endian we insert stores to higher bitpositions into the least
+ significant bits of the merged value.
+ The final merged value would be: 0x12345678abcd
+
+ Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
+ followed by a 16-bit store. Again, we must consider endianness when
+ breaking down the 48-bit value 'val' computed above.
+ For little endian we emit:
+ [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
+ [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
+
+ Whereas for big-endian we emit:
+ [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
+ [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "builtins.h"
+#include "fold-const.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "gimple-pretty-print.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "params.h"
+#include "print-tree.h"
+#include "tree-hash-traits.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "target.h"
+
+/* The maximum size (in bits) of the stores this pass should generate. */
+#define MAX_STORE_BITSIZE (BITS_PER_WORD)
+#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
+
+namespace {
+
+/* Struct recording the information about a single store of an immediate
+ to memory. These are created in the first phase and coalesced into
+ merged_store_group objects in the second phase. */
+
+struct store_immediate_info
+{
+ HOST_WIDE_INT bitsize;
+ HOST_WIDE_INT bitpos;
+ tree val;
+ tree dest;
+ gimple *stmt;
+ unsigned int order;
+ store_immediate_info (HOST_WIDE_INT, HOST_WIDE_INT, tree, tree, gimple *,
+ unsigned int);
+};
+
+store_immediate_info::store_immediate_info (HOST_WIDE_INT bs, HOST_WIDE_INT bp,
+ tree v, tree d, gimple *st,
+ unsigned int ord)
+ : bitsize (bs), bitpos (bp), val (v), dest (d), stmt (st), order (ord)
+{
+}
+
+/* Struct representing a group of stores to contiguous memory locations.
+ These are produced by the second phase (coalescing) and consumed in the
+ third phase that outputs the widened stores. */
+
+struct merged_store_group
+{
+ HOST_WIDE_INT start;
+ HOST_WIDE_INT width;
+ unsigned char *val;
+ unsigned int align;
+ auto_vec stores;
+ /* We record the first and last original statements in the sequence because
+ because we'll need their vuse/vdef and replacement position. */
+ gimple *last_stmt;
+ unsigned int last_order;
+
+ gimple *first_stmt;
+ unsigned int first_order;
+ merged_store_group (store_immediate_info *);
+ ~merged_store_group ();
+ void merge_into (store_immediate_info *);
+ void merge_overlapping (store_immediate_info *);
+ void apply_stores ();
+};
+
+/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
+
+static void
+dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
+{
+ if (!fd)
+ return;
+
+ for (unsigned int i = 0; i < len; i++)
+ fprintf (fd, "%x ", ptr[i]);
+ fprintf (fd, "\n");
+}
+
+/* Write BITLEN bits of EXPR to the byte array PTR at
+ bit position BITPOS. PTR should contain MAX_STORE_BYTES elements. */
+
+static void
+encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos)
+{
+ unsigned char *tmpbuf = XALLOCAVEC (unsigned char, MAX_STORE_BYTES);
+ memset (tmpbuf, 0, MAX_STORE_BYTES);
+ int last_byte = (bitpos + bitlen) / BITS_PER_UNIT;
+ gcc_assert (last_byte <= MAX_STORE_BYTES);
+ native_encode_expr (expr, tmpbuf, MAX_STORE_BYTES, 0);
+ tree read_back_type
+ = build_nonstandard_integer_type (TYPE_PRECISION (TREE_TYPE (expr)),
+ UNSIGNED);
+ tree tmp_int
+ = native_interpret_expr (read_back_type, tmpbuf, MAX_STORE_BYTES);
+
+ gcc_assert (tmp_int);
+
+ tree dest_int_type
+ = build_nonstandard_integer_type (MAX_STORE_BITSIZE, UNSIGNED);
+ tree ptr_wide_int
+ = native_interpret_expr (dest_int_type, ptr, MAX_STORE_BYTES);
+ wide_int dest_wide_int
+ = wi::to_wide (ptr_wide_int, TYPE_PRECISION (dest_int_type));
+ wide_int expr_wide_int = wi::to_wide (tmp_int, MAX_STORE_BITSIZE);
+ if (BYTES_BIG_ENDIAN)
+ dest_wide_int = wi::insert (dest_wide_int, expr_wide_int,
+ MAX_STORE_BITSIZE - bitlen - bitpos, bitlen);
+ else
+ dest_wide_int = wi::insert (dest_wide_int, expr_wide_int, bitpos, bitlen);
+
+ tree res = wide_int_to_tree (dest_int_type, dest_wide_int);
+
+ native_encode_expr (res, ptr, MAX_STORE_BYTES, 0);
+}
+
+/* Sorting function for store_immediate_info objects.
+ Sorts them by bitposition. */
+
+static int
+sort_by_bitpos (const void *x, const void *y)
+{
+ store_immediate_info *const *tmp = (store_immediate_info * const *) x;
+ store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
+
+ if ((*tmp)->bitpos <= (*tmp2)->bitpos)
+ return -1;
+ else if ((*tmp)->bitpos > (*tmp2)->bitpos)
+ return 1;
+
+ gcc_unreachable ();
+ return 0;
+}
+
+/* Sorting function for store_immediate_info objects.
+ Sorts them by the order field. */
+
+static int
+sort_by_order (const void *x, const void *y)
+{
+ store_immediate_info *const *tmp = (store_immediate_info * const *) x;
+ store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
+
+ if ((*tmp)->order < (*tmp2)->order)
+ return -1;
+ else if ((*tmp)->order > (*tmp2)->order)
+ return 1;
+
+ gcc_unreachable ();
+ return 0;
+}
+
+/* Initialize a merged_store_group object from a store_immediate_info
+ object. */
+
+merged_store_group::merged_store_group (store_immediate_info *info)
+{
+ start = info->bitpos;
+ width = info->bitsize;
+ val = new unsigned char[MAX_STORE_BYTES];
+ align = get_object_alignment (info->dest);
+ stores.create (1);
+ stores.safe_push (info);
+ last_stmt = info->stmt;
+ last_order = info->order;
+ first_stmt = last_stmt;
+ first_order = last_order;
+}
+
+merged_store_group::~merged_store_group ()
+{
+ delete[] val;
+}
+
+/* Merge a store recorded by INFO into this merged store.
+ The store is not overlapping with the existing recorded
+ stores. */
+
+void
+merged_store_group::merge_into (store_immediate_info *info)
+{
+ HOST_WIDE_INT wid = info->bitsize;
+ /* Make sure we're inserting in the position we think we're inserting. */
+ gcc_assert (info->bitpos == start + width);
+
+ width += wid;
+ gimple *stmt = info->stmt;
+ stores.safe_push (info);
+ if (info->order > last_order)
+ {
+ last_order = info->order;
+ last_stmt = stmt;
+ }
+ else if (info->order < first_order)
+ {
+ first_order = info->order;
+ first_stmt = stmt;
+ }
+}
+
+/* Merge a store described by INFO into this merged store.
+ INFO overlaps in some way with the current store (i.e. it's not contiguous
+ which is handled by merged_store_group::merge_into). */
+
+void
+merged_store_group::merge_overlapping (store_immediate_info *info)
+{
+ gimple *stmt = info->stmt;
+ stores.safe_push (info);
+
+ /* If the store extends the size of the group, extend the width. */
+ if ((info->bitpos + info->bitsize) > (start + width))
+ width += info->bitpos + info->bitsize - (start + width);
+
+ if (info->order > last_order)
+ {
+ last_order = info->order;
+ last_stmt = stmt;
+ }
+ else if (info->order < first_order)
+ {
+ first_order = info->order;
+ first_stmt = stmt;
+ }
+}
+
+/* Go through all the recorded stores in this group in program order and
+ apply their values to the VAL byte array to create the final merged
+ value. */
+
+void
+merged_store_group::apply_stores ()
+{
+ stores.qsort (sort_by_order);
+ struct store_immediate_info *info;
+ unsigned int i;
+ memset (val, 0, MAX_STORE_BYTES);
+
+ FOR_EACH_VEC_ELT (stores, i, info)
+ {
+ unsigned int pos_in_buffer = info->bitpos - start;
+ encode_tree_to_bitpos (info->val, val, info->bitsize, pos_in_buffer);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Afer writing ");
+ print_generic_expr (dump_file, info->val, 0);
+ fprintf (dump_file, " at position %d the merged region contains:\n",
+ pos_in_buffer);
+ dump_char_array (dump_file, val, MAX_STORE_BYTES);
+ }
+ }
+}
+
+/* Structure describing the store chain. */
+
+struct imm_store_chain_info
+{
+ auto_vec m_store_info;
+ auto_vec m_merged_store_groups;
+
+ bool terminate_and_process_chain (tree);
+ bool coalesce_immediate_stores ();
+ bool output_merged_store (tree, merged_store_group *);
+ bool output_merged_stores (tree);
+};
+
+const pass_data pass_data_tree_store_merging = {
+ GIMPLE_PASS, /* type */
+ "store-merging", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_store_merging : public gimple_opt_pass
+{
+public:
+ pass_store_merging (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_tree_store_merging, ctxt)
+ {
+ }
+
+ /* Pass not supported for PDP-endianness. */
+ virtual bool
+ gate (function *)
+ {
+ return flag_store_merging && optimize
+ && (WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN);
+ }
+
+ virtual unsigned int execute (function *);
+
+private:
+ hash_map m_stores;
+
+ bool terminate_and_process_all_chains ();
+ bool terminate_all_aliasing_chains (tree, tree, gimple *);
+ bool terminate_and_release_chain (tree);
+}; // class pass_store_merging
+
+/* Terminate and process all recorded chains. Return true if any changes
+ were made. */
+
+bool
+pass_store_merging::terminate_and_process_all_chains ()
+{
+ hash_map::iterator iter
+ = m_stores.begin ();
+ bool ret = false;
+ for (; iter != m_stores.end (); ++iter)
+ ret |= terminate_and_release_chain ((*iter).first);
+
+ return ret;
+}
+
+/* Terminate all chains that are affected by the assignment to DEST, appearing
+ in statement STMT and ultimately points to the object BASE. Return true if
+ at least one aliasing chain was terminated. BASE and DEST are allowed to
+ be NULL_TREE. In that case the aliasing checks are performed on the whole
+ statement rather than a particular operand in it. */
+
+bool
+pass_store_merging::terminate_all_aliasing_chains (tree dest, tree base,
+ gimple *stmt)
+{
+ bool ret = false;
+
+ /* If the statement doesn't touch memory it can't alias. */
+ if (!gimple_vuse (stmt))
+ return false;
+
+ struct imm_store_chain_info **chain_info = NULL;
+
+ if (base)
+ {
+ chain_info = m_stores.get (base);
+ if (chain_info)
+ {
+ struct store_immediate_info *info;
+ unsigned int i;
+ FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
+ {
+ if (refs_may_alias_p (info->dest, dest))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "stmt causes chain termination:\n");
+ print_gimple_stmt (dump_file, stmt, 0, 0);
+ }
+ terminate_and_release_chain (base);
+ ret = true;
+ break;
+ }
+ }
+ }
+ }
+
+ hash_map::iterator iter
+ = m_stores.begin ();
+
+ for (; iter != m_stores.end (); ++iter)
+ {
+ if (chain_info && (*chain_info) == (*iter).second)
+ continue;
+
+ tree key = (*iter).first;
+ if (ref_maybe_used_by_stmt_p (stmt, key)
+ || stmt_may_clobber_ref_p (stmt, key))
+ {
+ terminate_and_release_chain (key);
+ ret = true;
+ }
+ }
+
+ return ret;
+}
+
+/* Helper function. Terminate the recorded chain storing to base object
+ BASE. Return true if the merging and output was successful. The m_stores
+ entry is removed after the processing in any case. */
+
+bool
+pass_store_merging::terminate_and_release_chain (tree base)
+{
+ struct imm_store_chain_info **chain_info = m_stores.get (base);
+
+ if (!chain_info)
+ return false;
+
+ gcc_assert (*chain_info);
+
+ bool ret = (*chain_info)->terminate_and_process_chain (base);
+ delete *chain_info;
+ m_stores.remove (base);
+
+ return ret;
+}
+
+/* Go through the candidate stores recorded in m_store_info and merge them
+ into merged_store_group objects recorded into m_merged_store_groups
+ representing the widened stores. Return true if coalescing was successful
+ and the number of widened stores is fewer than the original number
+ of stores. */
+
+bool
+imm_store_chain_info::coalesce_immediate_stores ()
+{
+ /* Anything less can't be processed. */
+ if (m_store_info.length () < 2)
+ return false;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
+ m_store_info.length ());
+
+ store_immediate_info *info;
+ unsigned int i;
+
+ /* Order the stores by the bitposition they write to. */
+ m_store_info.qsort (sort_by_bitpos);
+
+ info = m_store_info[0];
+ merged_store_group *merged_store = new merged_store_group (info);
+ m_merged_store_groups.safe_push (merged_store);
+
+ FOR_EACH_VEC_ELT (m_store_info, i, info)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
+ " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
+ i, info->bitsize, info->bitpos);
+ print_generic_expr (dump_file, info->val, 0);
+ fprintf (dump_file, "\n------------\n");
+ }
+
+ if (i == 0)
+ continue;
+
+ unsigned int prev_size = merged_store->width;
+
+ /* |---store 1---|
+ |---store 2---|
+ Partially overlapping stores are not handled at the moment. */
+ HOST_WIDE_INT start = info->bitpos;
+ if (IN_RANGE (start, merged_store->start,
+ merged_store->start + merged_store->width - 1))
+ {
+ if (start + info->bitsize - merged_store->start > MAX_STORE_BITSIZE)
+ return false;
+
+ merged_store->merge_overlapping (info);
+ continue;
+ }
+
+ /* |---store 1---| |---store 2---|.
+ Gap between stores. Start a new group. */
+ if (start != merged_store->start + merged_store->width)
+ {
+ merged_store = new merged_store_group (info);
+ m_merged_store_groups.safe_push (merged_store);
+ continue;
+ }
+
+ /* |---store 1---||---store 2---|
+ This store is consecutive to the previous one. */
+
+ /* If we will be exceeding the maximum store size start a new group but
+ record the alignment of the new store appropriately. */
+ if (prev_size + info->bitsize > MAX_STORE_BITSIZE)
+ {
+ merged_store_group *new_merged_store = new merged_store_group (info);
+ new_merged_store->align
+ = MIN (merged_store->align, merged_store->width);
+ merged_store = new_merged_store;
+ m_merged_store_groups.safe_push (merged_store);
+ continue;
+ }
+ /* Merge it into the current store group. */
+ else
+ {
+ merged_store->merge_into (info);
+ continue;
+ }
+
+ gcc_unreachable ();
+ return false;
+ }
+
+ gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
+ bool success = m_merged_store_groups.length () < m_store_info.length ();
+ if (dump_file)
+ {
+ if (success)
+ fprintf (dump_file, "Coalescing successful!\n"
+ "Merged into %u stores\n",
+ m_merged_store_groups.length ());
+ }
+
+ if (success)
+ {
+ FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
+ {
+ merged_store->apply_stores ();
+ }
+ }
+ return success;
+}
+
+/* Return the type to use for the merged stores described by GROUP.
+ This is needed to get the alias sets right. */
+
+static tree
+get_type_for_merged_store (merged_store_group *group)
+{
+ struct store_immediate_info *store;
+ unsigned int i;
+ tree lhs = gimple_assign_lhs (group->stores[0]->stmt);
+ tree type = reference_alias_ptr_type (lhs);
+ FOR_EACH_VEC_ELT (group->stores, i, store)
+ {
+ gimple *stmt = store->stmt;
+ if (i == 0)
+ continue;
+
+ lhs = gimple_assign_lhs (stmt);
+ tree type1 = reference_alias_ptr_type (lhs);
+ if (!alias_ptr_types_compatible_p (type, type1))
+ return ptr_type_node;
+ }
+ return type;
+}
+
+/* Return the location_t information we can find among the statements
+ in GROUP. */
+
+static location_t
+get_merged_store_location (merged_store_group *group)
+{
+ struct store_immediate_info *store;
+ unsigned int i;
+
+ FOR_EACH_VEC_ELT (group->stores, i, store)
+ {
+ gimple *stmt = store->stmt;
+ if (gimple_has_location (stmt))
+ return gimple_location (stmt);
+ }
+ return UNKNOWN_LOCATION;
+}
+
+/* Return true if storing an integer of bitsize SIZE using an unaligned
+ access if prohibitively slow. */
+
+static bool
+slow_unaligned_size_access (unsigned int size)
+{
+ machine_mode mode = mode_for_size (size, MODE_INT, 0);
+ gcc_assert (mode != BLKmode);
+ return SLOW_UNALIGNED_ACCESS (mode, GET_MODE_ALIGNMENT (mode));
+}
+
+/* Given a merged store group GROUP output the widened version of it.
+ The store chain is against the base object BASE.
+ Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
+ unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
+ Make sure that the number of statements output is less than the number of
+ original statements. If a better sequence is possible emit it and
+ return true. See note on endianness at top of file for an explanation of
+ the BYTES_BIG_ENDIAN logic. */
+
+bool
+imm_store_chain_info::output_merged_store (tree base, merged_store_group *group)
+{
+ HOST_WIDE_INT pos = group->start;
+ HOST_WIDE_INT size = group->width;
+ HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
+ unsigned int align = group->align;
+ gcc_assert (IN_RANGE (size, 0, MAX_STORE_BITSIZE));
+
+ unsigned int orig_num_stmts = group->stores.length ();
+ if (orig_num_stmts < 2)
+ return false;
+
+ /* We don't handle partial bitfields for now. */
+ if ((size % BITS_PER_UNIT != 0) || (pos % BITS_PER_UNIT != 0))
+ return false;
+
+ bool allow_unaligned
+ = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
+ unsigned int try_size = MAX_STORE_BITSIZE;
+ while (try_size > size
+ || ((!allow_unaligned || slow_unaligned_size_access (try_size))
+ && try_size > align))
+ {
+ try_size /= 2;
+ if (try_size < BITS_PER_UNIT)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Failed to output merged store.\n"
+ "Failed to find starting size meeting"
+ " alignment requirements.\n");
+ }
+ return false;
+ }
+ }
+
+ HOST_WIDE_INT try_pos = bytepos;
+ int wi_offset = 0;
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "Trying to output merged store at pos " HOST_WIDE_INT_PRINT_DEC
+ ", of size " HOST_WIDE_INT_PRINT_DEC ", "
+ "starting size %u and alignment %u\n",
+ pos, size, try_size, align);
+ }
+
+ gimple_seq seq = NULL;
+ unsigned int num_stmts = 0;
+ tree offset_type = get_type_for_merged_store (group);
+ tree last_vdef, new_vuse;
+ last_vdef = gimple_vdef (group->last_stmt);
+ new_vuse = gimple_vuse (group->last_stmt);
+ location_t loc = get_merged_store_location (group);
+ gimple *stmt = NULL;
+ gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
+
+ while (size > 0)
+ {
+ tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
+ SET_TYPE_ALIGN (int_type, align);
+ tree addr = build_fold_addr_expr (base);
+ tree dest = fold_build2 (MEM_REF, int_type, addr,
+ build_int_cst (offset_type, try_pos));
+
+ tree src = native_interpret_expr (int_type,
+ group->val + wi_offset / BITS_PER_UNIT,
+ try_size / BITS_PER_UNIT);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Output src is:\n");
+ print_generic_expr (dump_file, src, 0);
+ fprintf (dump_file, "\n");
+ }
+ stmt = gimple_build_assign (dest, src);
+ gimple_set_vuse (stmt, new_vuse);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ gimple_set_bb (stmt, gsi_bb (last_gsi));
+
+ num_stmts++;
+
+ try_pos += try_size / BITS_PER_UNIT;
+
+ wi_offset += try_size;
+ size -= try_size;
+ align = try_size;
+ while (size < try_size)
+ try_size /= 2;
+
+ if (num_stmts >= orig_num_stmts)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Exceeded original number of stmts (%u)."
+ " Not profitable to emit new sequence.\n",
+ orig_num_stmts);
+ }
+ return false;
+ }
+
+ tree new_vdef
+ = size ? make_ssa_name (gimple_vop (cfun), stmt) : last_vdef;
+ gimple_set_vdef (stmt, new_vdef);
+ SSA_NAME_DEF_STMT (new_vdef) = stmt;
+ new_vuse = new_vdef;
+ }
+ gcc_assert (size == 0);
+ gcc_assert (seq);
+ annotate_all_with_location (seq, loc);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "New sequence of %u stmts to replace old one of %u stmts\n",
+ num_stmts, orig_num_stmts);
+ if (dump_flags & TDF_DETAILS)
+ print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
+ }
+ gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
+
+ return true;
+}
+
+/* Process the merged_store_group objects created in the coalescing phase.
+ The stores are all against the base object BASE.
+ Try to output the widened stores and delete the original statements if
+ successful. Return true iff any changes were made. */
+
+bool
+imm_store_chain_info::output_merged_stores (tree base)
+{
+ unsigned int i;
+ merged_store_group *merged_store;
+ bool ret = false;
+ FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
+ {
+ bool successful_p = output_merged_store (base, merged_store);
+ if (successful_p)
+ {
+ unsigned int j;
+ store_immediate_info *store;
+ FOR_EACH_VEC_ELT (merged_store->stores, j, store)
+ {
+ gimple *stmt = store->stmt;
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ gsi_remove (&gsi, true);
+ if (stmt != merged_store->last_stmt)
+ {
+ unlink_stmt_vdef (stmt);
+ release_defs (stmt);
+ }
+ }
+ }
+ ret |= successful_p;
+ }
+ if (ret && dump_file)
+ fprintf (dump_file, "Merging successful!\n");
+
+ return ret;
+}
+
+/* Coalesce the store_immediate_info objects recorded against the base object
+ BASE in the first phase and output them.
+ Delete the allocated structures.
+ Return true if any changes were made. */
+
+bool
+imm_store_chain_info::terminate_and_process_chain (tree base)
+{
+ /* Process store chain. */
+ bool ret = false;
+ if (m_store_info.length () > 1)
+ {
+ ret = coalesce_immediate_stores ();
+ if (ret)
+ ret = output_merged_stores (base);
+ }
+
+ /* Delete all the entries we allocated ourselves. */
+ store_immediate_info *info;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (m_store_info, i, info)
+ delete info;
+
+ merged_store_group *merged_info;
+ FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
+ delete merged_info;
+
+ return ret;
+}
+
+/* Return true iff LHS is a destination potentially interesting for
+ store merging. */
+
+static bool
+lhs_code_valid_for_store_merging_p (tree lhs)
+{
+ tree_code code = TREE_CODE (lhs);
+ tree type = TREE_TYPE (lhs);
+
+ /* If the target requests a promotion to a wider type get_inner_reference may
+ give bad results for the store destination (for example __fp16 to float
+ promotion in aarch64). Be conservative here. */
+ tree prom_type = targetm.promoted_type (type);
+ if (prom_type && TYPE_PRECISION (type) != TYPE_PRECISION (prom_type))
+ return false;
+
+ if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
+ || code == COMPONENT_REF || code == BIT_FIELD_REF)
+ return true;
+
+ return false;
+}
+
+/* Return true if tree_code CODE is of a valid constant we want to consider
+ during store merging. In practice accept all codes that
+ native_encode_expr accepts. */
+
+static bool
+rhs_code_valid_for_store_merging_p (tree_code code)
+{
+ if (TREE_CODE_CLASS (code) != tcc_constant)
+ return false;
+
+ return code != VOID_CST;
+}
+
+/* Entry point for the pass. Go over each basic block recording chains of
+ immediate stores. Upon encountering a terminating statement (as defined
+ by stmt_terminates_chain_p) process the recorded stores and emit the widened
+ variants. */
+
+unsigned int
+pass_store_merging::execute (function *fun)
+{
+ basic_block bb;
+ hash_set orig_stmts;
+
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ gimple_stmt_iterator gsi;
+ HOST_WIDE_INT num_statements = 0;
+ /* Record the original statements so that we can keep track of
+ statements emitted in this pass and not re-process new
+ statements. */
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple_set_visited (gsi_stmt (gsi), false);
+ num_statements++;
+ }
+
+ if (num_statements < 2)
+ continue;
+
+ if (dump_file)
+ fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
+
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (gimple_has_volatile_ops (stmt))
+ {
+ /* Terminate all chains. */
+ if (dump_file)
+ fprintf (dump_file, "Volatile access terminates "
+ "all chains\n");
+ terminate_and_process_all_chains ();
+ continue;
+ }
+
+ if (is_gimple_debug (stmt))
+ continue;
+
+ if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
+ && lhs_code_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
+ {
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ tree_code rhs_code = gimple_assign_rhs_code (stmt);
+
+ HOST_WIDE_INT bitsize, bitpos;
+ machine_mode mode;
+ int unsignedp = 0, reversep = 0, volatilep = 0;
+ tree offset, base_addr;
+ base_addr
+ = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
+ gcc_assert (!volatilep);
+ bool invalid = offset || reversep || bitsize > MAX_STORE_BITSIZE
+ || !rhs_code_valid_for_store_merging_p (rhs_code);
+ struct imm_store_chain_info **chain_info
+ = m_stores.get (base_addr);
+
+ if (!invalid)
+ {
+ store_immediate_info *info;
+ if (chain_info)
+ {
+ info = new store_immediate_info (
+ bitsize, bitpos, rhs, lhs, stmt,
+ (*chain_info)->m_store_info.length ());
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "Recording immediate store from stmt:\n");
+ print_gimple_stmt (dump_file, stmt, 0, 0);
+ }
+ (*chain_info)->m_store_info.safe_push (info);
+ continue;
+ }
+
+ /* Store aliases any existing chain? */
+ terminate_all_aliasing_chains (lhs, base_addr, stmt);
+ /* Start a new chain. */
+ struct imm_store_chain_info *new_chain
+ = new imm_store_chain_info;
+ info = new store_immediate_info (bitsize, bitpos, rhs, lhs,
+ stmt, 0);
+ new_chain->m_store_info.safe_push (info);
+ m_stores.put (base_addr, new_chain);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "Starting new chain with statement:\n");
+ print_gimple_stmt (dump_file, stmt, 0, 0);
+ }
+ }
+ else
+ terminate_all_aliasing_chains (lhs, base_addr, stmt);
+
+ continue;
+ }
+
+ terminate_all_aliasing_chains (NULL_TREE, NULL_TREE, stmt);
+ }
+ terminate_and_process_all_chains ();
+ }
+ return 0;
+}
+
+} // anon namespace
+
+/* Construct and return a store merging pass object. */
+
+gimple_opt_pass *
+make_pass_store_merging (gcc::context *ctxt)
+{
+ return new pass_store_merging (ctxt);
+}
diff --git a/gcc/opts.c b/gcc/opts.c
index bc0570dbd5ecf803737c32455060c51a16726c59..96d613c234cc251f19b0815c98a0e50136c08b21 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -463,6 +463,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_1_PLUS, OPT_ftree_dse, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_ter, NULL, 1 },
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 },
+ { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fstore_merging, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_fre, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_copy_prop, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_ftree_sink, NULL, 1 },
diff --git a/gcc/params.def b/gcc/params.def
index 8907aa4a0ffa5619b2b4022faa81db4fad89ce52..e63e5948089e1a6569cce386292d0d0890612256 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1100,6 +1100,12 @@ DEFPARAM (PARAM_MAX_TAIL_MERGE_COMPARISONS,
"Maximum amount of similar bbs to compare a bb with.",
10, 0, 0)
+DEFPARAM (PARAM_STORE_MERGING_ALLOW_UNALIGNED,
+ "store-merging-allow-unaligned",
+ "Allow the store merging pass to introduce unaligned stores "
+ "if it is legal to do so",
+ 1, 0, 1)
+
DEFPARAM (PARAM_MAX_TAIL_MERGE_ITERATIONS,
"max-tail-merge-iterations",
"Maximum amount of iterations of the pass over a function.",
diff --git a/gcc/passes.def b/gcc/passes.def
index b8d87c3fb0348742c270b935c7914e1832f1dcd9..d5530e3ba072fb5ed4a1d43309cc019273249673 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -325,6 +325,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_fold_builtins);
NEXT_PASS (pass_optimize_widening_mul);
+ NEXT_PASS (pass_store_merging);
NEXT_PASS (pass_tail_calls);
/* If DCE is not run before checking for uninitialized uses,
we may get false warnings (e.g., testsuite/gcc.dg/uninit-5.c).
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr22141-1.c b/gcc/testsuite/gcc.c-torture/execute/pr22141-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..7c888b469cf39f00ced8ddb8cc6dc245fa30b97b
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr22141-1.c
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u;
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v;
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v;
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w;
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr22141-2.c b/gcc/testsuite/gcc.c-torture/execute/pr22141-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..cb9cc79026310260ffc3a83bfdf9bfc92f998a86
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr22141-2.c
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u __attribute__((aligned));
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v __attribute__((aligned));
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v __attribute__((aligned));
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w __attribute__((aligned));
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/store_merging_1.c b/gcc/testsuite/gcc.dg/store_merging_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..09a4d1456c6db5c685cf872de688b4a5b9ef2c22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/store_merging_1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+struct bar {
+ int a;
+ char b;
+ char c;
+ char d;
+ char e;
+ char f;
+ char g;
+};
+
+void
+foo1 (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+}
+
+void
+foo2 (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 1;
+ p->d = 0;
+ p->e = 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
diff --git a/gcc/testsuite/gcc.dg/store_merging_2.c b/gcc/testsuite/gcc.dg/store_merging_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..d3acc2d330c303a1db22c787e1ba8c89f4a423cc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/store_merging_2.c
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+struct bar
+{
+ int a;
+ unsigned char b;
+ unsigned char c;
+ short d;
+ unsigned char e;
+ unsigned char f;
+ unsigned char g;
+};
+
+__attribute__ ((noinline)) void
+foozero (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+ p->f = 0;
+ p->g = 0;
+}
+
+__attribute__ ((noinline)) void
+foo1 (struct bar *p)
+{
+ p->b = 1;
+ p->a = 2;
+ p->c = 3;
+ p->d = 4;
+ p->e = 5;
+ p->f = 0;
+ p->g = 0xff;
+}
+
+__attribute__ ((noinline)) void
+foo2 (struct bar *p, struct bar *p2)
+{
+ p->b = 0xff;
+ p2->b = 0xa;
+ p->a = 0xfffff;
+ p2->c = 0xc;
+ p->c = 0xff;
+ p2->d = 0xbf;
+ p->d = 0xfff;
+}
+
+int
+main (void)
+{
+ struct bar b1, b2;
+ foozero (&b1);
+ foozero (&b2);
+
+ foo1 (&b1);
+ if (b1.b != 1 || b1.a != 2 || b1.c != 3 || b1.d != 4 || b1.e != 5
+ || b1.f != 0 || b1.g != 0xff)
+ __builtin_abort ();
+
+ foozero (&b1);
+ /* Make sure writes to aliasing struct pointers preserve the
+ correct order. */
+ foo2 (&b1, &b1);
+ if (b1.b != 0xa || b1.a != 0xfffff || b1.c != 0xff || b1.d != 0xfff)
+ __builtin_abort ();
+
+ foozero (&b1);
+ foo2 (&b1, &b2);
+ if (b1.a != 0xfffff || b1.b != 0xff || b1.c != 0xff || b1.d != 0xfff
+ || b2.b != 0xa || b2.c != 0xc || b2.d != 0xbf)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
diff --git a/gcc/testsuite/gcc.dg/store_merging_3.c b/gcc/testsuite/gcc.dg/store_merging_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..cd756c1afaeb3338501971bc6a4b7c72f1a3c85d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/store_merging_3.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+/* Make sure stores to volatile addresses don't get combined with
+ other accesses. */
+
+struct bar
+{
+ int a;
+ char b;
+ char c;
+ volatile short d;
+ char e;
+ char f;
+ char g;
+};
+
+void
+foozero (struct bar *p)
+{
+ p->b = 0xa;
+ p->a = 0xb;
+ p->c = 0xc;
+ p->d = 0;
+ p->e = 0xd;
+ p->f = 0xe;
+ p->g = 0xf;
+}
+
+/* { dg-final { scan-tree-dump "Volatile access terminates all chains" "store-merging" } } */
+/* { dg-final { scan-tree-dump-times "=\{v\} 0;" 1 "store-merging" } } */
diff --git a/gcc/testsuite/gcc.dg/store_merging_4.c b/gcc/testsuite/gcc.dg/store_merging_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..4bf9025af6e9bd852a178057b1ed80d3bd86fc57
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/store_merging_4.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+/* Check that we can merge interleaving stores that are guaranteed
+ to be non-aliasing. */
+
+struct bar
+{
+ int a;
+ char b;
+ char c;
+ short d;
+ char e;
+ char f;
+ char g;
+};
+
+void
+foozero (struct bar *restrict p, struct bar *restrict p2)
+{
+ p->b = 0xff;
+ p2->b = 0xa;
+ p->a = 0xfffff;
+ p2->a = 0xab;
+ p2->c = 0xc;
+ p->c = 0xff;
+ p2->d = 0xbf;
+ p->d = 0xfff;
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
diff --git a/gcc/testsuite/gcc.dg/store_merging_5.c b/gcc/testsuite/gcc.dg/store_merging_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..3b824204e74266d447fd14b9f83bd3bb1dd25915
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/store_merging_5.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+/* Make sure that non-aliasing non-constant interspersed stores do not
+ stop chains. */
+
+struct bar {
+ int a;
+ char b;
+ char c;
+ char d;
+ char e;
+ char g;
+};
+
+void
+foo1 (struct bar *p, char tmp)
+{
+ p->a = 0;
+ p->b = 0;
+ p->g = tmp;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 1 "store-merging" } } */
+/* { dg-final { scan-tree-dump-times "MEM\\\[.*\\\]" 1 "store-merging" } } */
diff --git a/gcc/testsuite/gcc.dg/store_merging_6.c b/gcc/testsuite/gcc.dg/store_merging_6.c
new file mode 100644
index 0000000000000000000000000000000000000000..7d89baf91b826e82c979092e4250fed650340d4d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/store_merging_6.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-require-effective-target non_strict_align } */
+/* { dg-options "-O -fdump-tree-store-merging" } */
+
+/* Check that we can widen accesses to bitfields. */
+
+struct bar {
+ int a : 3;
+ unsigned char b : 4;
+ unsigned char c : 1;
+ char d;
+ char e;
+ char f;
+ char g;
+};
+
+__attribute__ ((noinline)) void
+foozero (struct bar *p)
+{
+ p->b = 0;
+ p->a = 0;
+ p->c = 0;
+ p->d = 0;
+ p->e = 0;
+ p->f = 0;
+ p->g = 0;
+}
+
+__attribute__ ((noinline)) void
+foo1 (struct bar *p)
+{
+ p->b = 3;
+ p->a = 2;
+ p->c = 1;
+ p->d = 4;
+ p->e = 5;
+}
+
+int
+main (void)
+{
+ struct bar p;
+ foozero (&p);
+ foo1 (&p);
+ if (p.a != 2 || p.b != 3 || p.c != 1 || p.d != 4 || p.e != 5
+ || p.f != 0 || p.g != 0)
+ __builtin_abort ();
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c
index f02e55f1cc2f01063aea35b3b88f793bb2f7c532..9de4e771ab1e73bce960d4038f8ec5b49b5c612c 100644
--- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c
@@ -3,22 +3,22 @@
int arr[4][4];
void
-foo ()
+foo (int x, int y)
{
- arr[0][1] = 1;
- arr[1][0] = -1;
- arr[2][0] = 1;
- arr[1][1] = -1;
- arr[0][2] = 1;
- arr[0][3] = -1;
- arr[1][2] = 1;
- arr[2][1] = -1;
- arr[3][0] = 1;
- arr[3][1] = -1;
- arr[2][2] = 1;
- arr[1][3] = -1;
- arr[2][3] = 1;
- arr[3][2] = -1;
+ arr[0][1] = x;
+ arr[1][0] = y;
+ arr[2][0] = x;
+ arr[1][1] = y;
+ arr[0][2] = x;
+ arr[0][3] = y;
+ arr[1][2] = x;
+ arr[2][1] = y;
+ arr[3][0] = x;
+ arr[3][1] = y;
+ arr[2][2] = x;
+ arr[1][3] = y;
+ arr[2][3] = x;
+ arr[3][2] = y;
}
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" 7 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c
index 40056b1adebd3fe3e473e378e123ee62041da9a2..824f0d2e81bc250f40ffb71b3e39cde76c9ff28d 100644
--- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c
@@ -3,22 +3,22 @@
float arr[4][4];
void
-foo ()
+foo (float x, float y)
{
- arr[0][1] = 1;
- arr[1][0] = -1;
- arr[2][0] = 1;
- arr[1][1] = -1;
- arr[0][2] = 1;
- arr[0][3] = -1;
- arr[1][2] = 1;
- arr[2][1] = -1;
- arr[3][0] = 1;
- arr[3][1] = -1;
- arr[2][2] = 1;
- arr[1][3] = -1;
- arr[2][3] = 1;
- arr[3][2] = -1;
+ arr[0][1] = x;
+ arr[1][0] = y;
+ arr[2][0] = x;
+ arr[1][1] = y;
+ arr[0][2] = x;
+ arr[0][3] = y;
+ arr[1][2] = x;
+ arr[2][1] = y;
+ arr[3][0] = x;
+ arr[3][1] = y;
+ arr[2][2] = x;
+ arr[1][3] = y;
+ arr[2][3] = x;
+ arr[3][2] = y;
}
/* { dg-final { scan-assembler-times "stp\ts\[0-9\]+, s\[0-9\]" 7 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr22141.c b/gcc/testsuite/gcc.target/i386/pr22141.c
new file mode 100644
index 0000000000000000000000000000000000000000..efded365778417e7b4bf468288f42f5136fdd585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr22141.c
@@ -0,0 +1,126 @@
+/* PR middle-end/22141 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u;
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v;
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v;
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w;
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "67305985\|4030201" 7 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr34012.c b/gcc/testsuite/gcc.target/i386/pr34012.c
index 00b1240d1b9fb96fa558459bd4f357ad13a3881d..d0cffa052905a1c11e0927e92ab295d71517f746 100644
--- a/gcc/testsuite/gcc.target/i386/pr34012.c
+++ b/gcc/testsuite/gcc.target/i386/pr34012.c
@@ -1,7 +1,7 @@
/* PR rtl-optimization/34012 */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fno-store-merging" } */
void bar (long int *);
void
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 105200b403403b25f0382b3a7ba5d5a28ca86ae1..3cadfb078b928ca4198db7e6a10886b223966044 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -424,6 +424,7 @@ extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_return (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_function_noreturn (gcc::context *ctxt);