public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-6434] Recognize MULT_HIGHPART_EXPR in tree-ssa-math-opts pass.
@ 2022-01-11 12:31 Roger Sayle
  0 siblings, 0 replies; only message in thread
From: Roger Sayle @ 2022-01-11 12:31 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0752c75536e92212ac71c4a44bbc7a18bd7e0315

commit r12-6434-g0752c75536e92212ac71c4a44bbc7a18bd7e0315
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Tue Jan 11 12:30:44 2022 +0000

    Recognize MULT_HIGHPART_EXPR in tree-ssa-math-opts pass.
    
    This is the third iteration of a patch to perceive MULT_HIGHPART_EXPR
    in the middle-end.  As they say "the third time's a charm".  The first
    version implemented this in match.pd, which was considered too early.
    https://gcc.gnu.org/pipermail/gcc-patches/2020-August/551316.html
    The second version attempted to do this during RTL expansion, and was
    considered to be too late in the middle-end.
    https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576922.html
    https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576923.html
    
    This latest version incorporates Richard Biener's feedback/suggestion
    to perceive MULT_HIGHPART_EXPR in one of the "instruction selection
    passes", specifically tree-ssa-math-opts, where the recognition of
    highpart multiplications takes place in the same pass as widening
    multiplications.
    
    With each rewrite, the patch is also getting more aggressive in the
    set of widening multiplications that it recognizes as highpart multiplies.
    Currently any widening multiplication followed by a right shift (either
    signed or unsigned) by a bit count sufficient to eliminate the lowpart
    is recognized.  The result of this shift doesn't need to be truncated.
    As previously, this patch confirms the target provides a suitable
    optab before introducing the MULT_HIGHPART_EXPR.  This is the reason
    the testcase is restricted to x86_64, as this pass doesn't do anything
    on some platforms, but x86_64 should be sufficient to confirm that the
    pass is working/continues to work.
    
    2022-01-11  Roger Sayle  <roger@nextmovesoftware.com>
                Richard Biener  <rguenther@suse.de>
    
    gcc/ChangeLog
            * tree-ssa-math-opts.c (struct widen_mul_stats): Add a
            highpart_mults_inserted field.
            (convert_mult_to_highpart): New function to convert right shift
            of a widening multiply into a MULT_HIGHPART_EXPR.
            (math_opts_dom_walker::after_dom_children) [RSHIFT_EXPR]:
            Call new convert_mult_to_highpart function.
            (pass_optimize_widening_mul::execute): Add a statistics counter
            for tracking "highpart multiplications inserted" events.
    
    gcc/testsuite/ChangeLog
            * gcc.target/i386/mult-highpart.c: New test case.

Diff:
---
 gcc/testsuite/gcc.target/i386/mult-highpart.c | 167 ++++++++++++++++++++++++++
 gcc/tree-ssa-math-opts.c                      |  98 ++++++++++++++-
 2 files changed, 264 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/mult-highpart.c b/gcc/testsuite/gcc.target/i386/mult-highpart.c
new file mode 100644
index 00000000000..efde31169ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mult-highpart.c
@@ -0,0 +1,167 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -Wno-long-long -fdump-tree-optimized" } */
+
+typedef unsigned int __attribute ((mode(TI))) uti_t;
+typedef int __attribute ((mode(TI))) ti_t;
+
+long long stest1(long long x, long long y)
+{
+  return ((ti_t)x * (ti_t)y) >> 64;
+}
+
+long long stest2(long long x)
+{
+  return ((ti_t)x * 19065) >> 64;
+}
+
+long long stest3(long long x, long long y)
+{
+  return (uti_t)((ti_t)x * (ti_t)y) >> 64;
+}
+
+long long stest4(long long x)
+{
+  return (uti_t)((ti_t)x * 19065) >> 64;
+}
+
+ti_t stest5(long long x, long long y)
+{
+  return ((ti_t)x * (ti_t)y) >> 64;
+}
+
+ti_t stest6(long long x)
+{
+  return ((ti_t)x * 19065) >> 64;
+}
+
+uti_t stest7(long long x, long long y)
+{
+  return (uti_t)((ti_t)x * (ti_t)y) >>64;
+}
+
+uti_t stest8(long long x)
+{
+  return (uti_t)((ti_t)x * 19065) >> 64;
+}
+
+long long stest9(long long x, long long y)
+{
+  return ((ti_t)x * (ti_t)y) >> 72;
+}
+
+long long stest10(long long x)
+{
+  return ((ti_t)x * 19065) >> 72;
+}
+
+long long stest11(long long x, long long y)
+{
+  return (uti_t)((ti_t)x * (ti_t)y) >> 72;
+}
+
+long long stest12(long long x)
+{
+  return (uti_t)((ti_t)x * 19065) >> 72;
+}
+
+ti_t stest13(long long x, long long y)
+{
+  return ((ti_t)x * (ti_t)y) >> 72;
+}
+
+ti_t stest14(long long x)
+{
+  return ((ti_t)x * 19065) >> 72;
+}
+
+uti_t stest15(long long x, long long y)
+{
+  return (uti_t)((ti_t)x * (ti_t)y) >> 72;
+}
+
+uti_t stest16(long long x)
+{
+  return (uti_t)((ti_t)x * 19065) >> 72;
+}
+
+unsigned long long utest1(unsigned long long x, unsigned long long y)
+{
+  return ((uti_t)x * (uti_t)y) >> 64;
+}
+
+unsigned long long utest2(unsigned long long x)
+{
+  return ((uti_t)x * 19065) >> 64;
+}
+
+unsigned long long utest3(unsigned long long x, unsigned long long y)
+{
+  return (ti_t)((uti_t)x * (uti_t)y) >> 64;
+}
+
+unsigned long long utest4(unsigned long long x)
+{
+  return (ti_t)((uti_t)x * 19065) >> 64;
+}
+
+uti_t utest5(unsigned long long x, unsigned long long y)
+{
+  return ((uti_t)x * (uti_t)y) >> 64;
+}
+
+uti_t utest6(unsigned long long x)
+{
+  return ((uti_t)x * 19065) >> 64;
+}
+
+ti_t utest7(unsigned long long x, unsigned long long y)
+{
+  return (ti_t)((uti_t)x * (uti_t)y) >>64;
+}
+
+ti_t utest8(long long x)
+{
+  return (uti_t)((ti_t)x * 19065) >> 64;
+}
+
+unsigned long long utest9(unsigned long long x, unsigned long long y)
+{
+  return ((uti_t)x * (uti_t)y) >> 72;
+}
+
+unsigned long long utest10(unsigned long long x)
+{
+  return ((uti_t)x * 19065) >> 72;
+}
+
+unsigned long long utest11(unsigned long long x, unsigned long long y)
+{
+  return (ti_t)((uti_t)x * (uti_t)y) >> 72;
+}
+
+unsigned long long utest12(unsigned long long x)
+{
+  return (ti_t)((uti_t)x * 19065) >> 72;
+}
+
+uti_t utest13(unsigned long long x, unsigned long long y)
+{
+  return ((uti_t)x * (uti_t)y) >> 72;
+}
+
+uti_t utest14(unsigned long long x)
+{
+  return ((uti_t)x * 19065) >> 72;
+}
+
+ti_t utest15(unsigned long long x, unsigned long long y)
+{
+  return (ti_t)((uti_t)x * (uti_t)y) >> 72;
+}
+
+ti_t utest16(unsigned long long x)
+{
+  return (ti_t)((uti_t)x * 19065) >> 72;
+}
+
+/* { dg-final { scan-tree-dump-times " h\\* " 32 "optimized" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index c4564f9fceb..1b6a57b8a77 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -207,6 +207,9 @@ static struct
 
   /* Number of divmod calls inserted.  */
   int divmod_calls_inserted;
+
+  /* Number of highpart multiplication ops inserted.  */
+  int highpart_mults_inserted;
 } widen_mul_stats;
 
 /* The instance of "struct occurrence" representing the highest
@@ -4548,9 +4551,96 @@ convert_to_divmod (gassign *stmt)
   return true; 
 }    
 
+/* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
+   its rhs, and try to convert it into a MULT_HIGHPART_EXPR.  The return
+   value is true iff we converted the statement.  */
+
+static bool
+convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
+{
+  tree lhs = gimple_assign_lhs (stmt);
+  tree stype = TREE_TYPE (lhs);
+  tree sarg0 = gimple_assign_rhs1 (stmt);
+  tree sarg1 = gimple_assign_rhs2 (stmt);
+
+  if (TREE_CODE (stype) != INTEGER_TYPE
+      || TREE_CODE (sarg1) != INTEGER_CST
+      || TREE_CODE (sarg0) != SSA_NAME
+      || !tree_fits_uhwi_p (sarg1)
+      || !has_single_use (sarg0))
+    return false;
+
+  gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
+  if (!def)
+    return false;
+
+  enum tree_code mcode = gimple_assign_rhs_code (def);
+  if (mcode == NOP_EXPR)
+    {
+      tree tmp = gimple_assign_rhs1 (def);
+      if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
+	return false;
+      def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
+      if (!def)
+	return false;
+      mcode = gimple_assign_rhs_code (def);
+    }
+
+  if (mcode != WIDEN_MULT_EXPR
+      || gimple_bb (def) != gimple_bb (stmt))
+    return false;
+  tree mtype = TREE_TYPE (gimple_assign_lhs (def));
+  if (TREE_CODE (mtype) != INTEGER_TYPE
+      || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
+    return false;
+
+  tree mop1 = gimple_assign_rhs1 (def);
+  tree mop2 = gimple_assign_rhs2 (def);
+  tree optype = TREE_TYPE (mop1);
+  bool unsignedp = TYPE_UNSIGNED (optype);
+  unsigned int prec = TYPE_PRECISION (optype);
+
+  if (unsignedp != TYPE_UNSIGNED (mtype)
+      || TYPE_PRECISION (mtype) != 2 * prec)
+    return false;
+
+  unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
+  if (bits < prec || bits >= 2 * prec)
+    return false;
+
+  machine_mode mode = TYPE_MODE (optype);
+  optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+  if (optab_handler (tab, mode) == CODE_FOR_nothing)
+    return false;
+
+  location_t loc = gimple_location (stmt);
+  tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
+					   MULT_HIGHPART_EXPR, mop1, mop2);
+  tree highpart2 = highpart1;
+  tree ntype = optype;
+
+  if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
+    {
+      ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
+				    : signed_type_for (optype);
+      highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
+    }
+  if (bits > prec)
+    highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
+					RSHIFT_EXPR, highpart2, 
+					build_int_cst (ntype, bits - prec));
+
+  gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
+  gsi_replace (gsi, new_stmt, true);
+
+  widen_mul_stats.highpart_mults_inserted++;
+  return true;
+}
+
+
 /* Find integer multiplications where the operands are extended from
    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
-   where appropriate.  */
+   or MULT_HIGHPART_EXPR where appropriate.  */
 
 namespace {
 
@@ -4656,6 +4746,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 	      convert_to_divmod (as_a<gassign *> (stmt));
 	      break;
 
+	    case RSHIFT_EXPR:
+	      convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
+	      break;
+
 	    default:;
 	    }
 	}
@@ -4738,6 +4832,8 @@ pass_optimize_widening_mul::execute (function *fun)
 			    widen_mul_stats.fmas_inserted);
   statistics_counter_event (fun, "divmod calls inserted",
 			    widen_mul_stats.divmod_calls_inserted);
+  statistics_counter_event (fun, "highpart multiplications inserted",
+			    widen_mul_stats.highpart_mults_inserted);
 
   return cfg_changed ? TODO_cleanup_cfg : 0;
 }


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-01-11 12:31 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-11 12:31 [gcc r12-6434] Recognize MULT_HIGHPART_EXPR in tree-ssa-math-opts pass Roger Sayle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).