public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-8833] aarch64: Add a simple fixed-point class for costing
@ 2021-08-06 14:37 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2021-08-06 14:37 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d0d9602e3cd87ceffbbd3f57578d958e52ac931f

commit r11-8833-gd0d9602e3cd87ceffbbd3f57578d958e52ac931f
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Fri Aug 6 15:36:53 2021 +0100

    aarch64: Add a simple fixed-point class for costing
    
    This patch adds a simple fixed-point class for holding fractional
    cost values.  It can exactly represent the reciprocal of any
    single-vector SVE element count (including the non-power-of-2 ones).
    This means that it can also hold 1/N for all N in [1, 16], which should
    be enough for the various *_per_cycle fields.
    
    For now the assumption is that the number of possible reciprocals
    is fixed at compile time and so the class should always be able
    to hold an exact value.
    
    The class uses a uint64_t to hold the fixed-point value, which means
    that it can hold any scaled uint32_t cost.  Normally we don't worry
    about overflow when manipulating raw uint32_t costs, but just to be
    on the safe side, the class uses saturating arithmetic for all
    operations.
    
    As far as the changes to the cost routines themselves go:
    
    - The changes to aarch64_add_stmt_cost and its subroutines are
      just laying groundwork for future patches; no functional change
      intended.
    
    - The changes to aarch64_adjust_body_cost mean that we now
      take fractional differences into account.
    
    gcc/
            * config/aarch64/fractional-cost.h: New file.
            * config/aarch64/aarch64.c: Include <algorithm> (indirectly)
            and cost_fraction.h.
            (vec_cost_fraction): New typedef.
            (aarch64_detect_scalar_stmt_subtype): Use it for statement costs.
            (aarch64_detect_vector_stmt_subtype): Likewise.
            (aarch64_sve_adjust_stmt_cost, aarch64_adjust_stmt_cost): Likewise.
            (aarch64_estimate_min_cycles_per_iter): Use vec_cost_fraction
            for cycle counts.
            (aarch64_adjust_body_cost): Likewise.
            (aarch64_test_cost_fraction): New function.
            (aarch64_run_selftests): Call it.
    
    (cherry picked from commit 83d796d3e58badcb864d179b882979f714ffd162)

Diff:
---
 gcc/config/aarch64/aarch64.c         | 179 ++++++++++++++++++++------
 gcc/config/aarch64/fractional-cost.h | 236 +++++++++++++++++++++++++++++++++++
 2 files changed, 377 insertions(+), 38 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0239d10e19c..042a4daa22c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -20,8 +20,9 @@
 
 #define IN_TARGET_CODE 1
 
-#include "config.h"
 #define INCLUDE_STRING
+#define INCLUDE_ALGORITHM
+#include "config.h"
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
@@ -76,6 +77,7 @@
 #include "function-abi.h"
 #include "gimple-pretty-print.h"
 #include "tree-ssa-loop-niter.h"
+#include "fractional-cost.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -14914,10 +14916,10 @@ aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
    for STMT_INFO, which has cost kind KIND.  If this is a scalar operation,
    try to subdivide the target-independent categorization provided by KIND
    to get a more accurate cost.  */
-static unsigned int
+static fractional_cost
 aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
 				    stmt_vec_info stmt_info,
-				    unsigned int stmt_cost)
+				    fractional_cost stmt_cost)
 {
   /* Detect an extension of a loaded value.  In general, we'll be able to fuse
      the extension with the load.  */
@@ -14933,11 +14935,11 @@ aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
    the target-independent categorization provided by KIND to get a more
    accurate cost.  WHERE specifies where the cost associated with KIND
    occurs.  */
-static unsigned int
+static fractional_cost
 aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
 				    stmt_vec_info stmt_info, tree vectype,
 				    enum vect_cost_model_location where,
-				    unsigned int stmt_cost)
+				    fractional_cost stmt_cost)
 {
   const simd_vec_cost *simd_costs = aarch64_simd_vec_costs (vectype);
   const sve_vec_cost *sve_costs = nullptr;
@@ -15018,10 +15020,10 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
    for STMT_INFO, which has cost kind KIND and which when vectorized would
    operate on vector type VECTYPE.  Adjust the cost as necessary for SVE
    targets.  */
-static unsigned int
+static fractional_cost
 aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
 			      stmt_vec_info stmt_info, tree vectype,
-			      unsigned int stmt_cost)
+			      fractional_cost stmt_cost)
 {
   /* Unlike vec_promote_demote, vector_stmt conversions do not change the
      vector register size or number of units.  Integer promotions of this
@@ -15085,9 +15087,9 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
 /* STMT_COST is the cost calculated for STMT_INFO, which has cost kind KIND
    and which when vectorized would operate on vector type VECTYPE.  Add the
    cost of any embedded operations.  */
-static unsigned int
+static fractional_cost
 aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
-			  tree vectype, unsigned int stmt_cost)
+			  tree vectype, fractional_cost stmt_cost)
 {
   if (vectype)
     {
@@ -15341,7 +15343,7 @@ aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 
   if (flag_vect_cost_model)
     {
-      int stmt_cost
+      fractional_cost stmt_cost
 	= aarch64_builtin_vectorization_cost (kind, vectype, misalign);
 
       /* Do one-time initialization based on the vinfo.  */
@@ -15439,7 +15441,7 @@ aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count,
 	  && stmt_in_inner_loop_p (vinfo, stmt_info))
 	count *= 50; /*  FIXME  */
 
-      retval = (unsigned) (count * stmt_cost);
+      retval = (count * stmt_cost).ceil ();
       costs->region[where] += retval;
     }
 
@@ -15471,17 +15473,17 @@ aarch64_sve_op_count::dump () const
 
 /* Use ISSUE_INFO to estimate the minimum number of cycles needed to issue
    the operations described by OPS.  This is a very simplistic model!  */
-static unsigned int
+static fractional_cost
 aarch64_estimate_min_cycles_per_iter
   (const aarch64_vec_op_count *ops,
    const aarch64_base_vec_issue_info *issue_info)
 {
-  unsigned int cycles = MAX (ops->reduction_latency, 1);
-  cycles = MAX (cycles, CEIL (ops->stores, issue_info->stores_per_cycle));
-  cycles = MAX (cycles, CEIL (ops->loads + ops->stores,
-			      issue_info->loads_stores_per_cycle));
-  cycles = MAX (cycles, CEIL (ops->general_ops,
-			      issue_info->general_ops_per_cycle));
+  fractional_cost cycles = MAX (ops->reduction_latency, 1);
+  cycles = std::max (cycles, { ops->stores, issue_info->stores_per_cycle });
+  cycles = std::max (cycles, { ops->loads + ops->stores,
+			       issue_info->loads_stores_per_cycle });
+  cycles = std::max (cycles, { ops->general_ops,
+			       issue_info->general_ops_per_cycle });
   return cycles;
 }
 
@@ -15535,12 +15537,14 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
   if (!issue_info)
     return body_cost;
 
-  unsigned int scalar_cycles_per_iter
+  fractional_cost scalar_cycles_per_iter
     = aarch64_estimate_min_cycles_per_iter (&costs->scalar_ops,
 					    issue_info->scalar);
-  unsigned int advsimd_cycles_per_iter
+
+  fractional_cost advsimd_cycles_per_iter
     = aarch64_estimate_min_cycles_per_iter (&costs->advsimd_ops,
 					    issue_info->advsimd);
+
   bool could_use_advsimd
     = ((costs->vec_flags & VEC_ADVSIMD)
        || (aarch64_autovec_preference != 2
@@ -15557,36 +15561,37 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
       dump_printf_loc (MSG_NOTE, vect_location, "Scalar issue estimate:\n");
       costs->scalar_ops.dump ();
       dump_printf_loc (MSG_NOTE, vect_location,
-		       "  estimated cycles per iteration = %d\n",
-		       scalar_cycles_per_iter);
+		       "  estimated cycles per iteration = %f\n",
+		       scalar_cycles_per_iter.as_double ());
       if (could_use_advsimd)
 	{
 	  dump_printf_loc (MSG_NOTE, vect_location,
 			   "Advanced SIMD issue estimate:\n");
 	  costs->advsimd_ops.dump ();
 	  dump_printf_loc (MSG_NOTE, vect_location,
-			   "  estimated cycles per iteration = %d\n",
-			   advsimd_cycles_per_iter);
+			   "  estimated cycles per iteration = %f\n",
+			   advsimd_cycles_per_iter.as_double ());
 	}
       else
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "Loop could not use Advanced SIMD\n");
     }
 
-  uint64_t vector_cycles_per_iter = advsimd_cycles_per_iter;
+  fractional_cost vector_cycles_per_iter = advsimd_cycles_per_iter;
   unsigned int vector_reduction_latency = costs->advsimd_ops.reduction_latency;
+
   if ((costs->vec_flags & VEC_ANY_SVE) && issue_info->sve)
     {
       /* Estimate the minimum number of cycles per iteration needed to issue
 	 non-predicate operations.  */
-      unsigned int sve_cycles_per_iter
+      fractional_cost sve_cycles_per_iter
 	= aarch64_estimate_min_cycles_per_iter (&costs->sve_ops,
 						issue_info->sve);
 
       /* Separately estimate the minimum number of cycles per iteration needed
 	 to issue the predicate operations.  */
-      unsigned int pred_cycles_per_iter
-	= CEIL (costs->sve_ops.pred_ops, issue_info->sve->pred_ops_per_cycle);
+      fractional_cost pred_cycles_per_iter
+	= { costs->sve_ops.pred_ops, issue_info->sve->pred_ops_per_cycle };
 
       if (dump_enabled_p ())
 	{
@@ -15594,14 +15599,16 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 	  costs->sve_ops.dump ();
 	  dump_printf_loc (MSG_NOTE, vect_location,
 			   "  estimated cycles per iteration for non-predicate"
-			   " operations = %d\n", sve_cycles_per_iter);
+			   " operations = %f\n",
+			   sve_cycles_per_iter.as_double ());
 	  if (costs->sve_ops.pred_ops)
 	    dump_printf_loc (MSG_NOTE, vect_location, "  estimated cycles per"
 			     " iteration for predicate operations = %d\n",
-			     pred_cycles_per_iter);
+			     pred_cycles_per_iter.as_double ());
 	}
 
-      vector_cycles_per_iter = MAX (sve_cycles_per_iter, pred_cycles_per_iter);
+      vector_cycles_per_iter = std::max (sve_cycles_per_iter,
+					 pred_cycles_per_iter);
       vector_reduction_latency = costs->sve_ops.reduction_latency;
 
       /* If the scalar version of the loop could issue at least as
@@ -15615,7 +15622,7 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 	 too drastic for scalar_cycles_per_iter vs. sve_cycles_per_iter;
 	 code later in the function handles that case in a more
 	 conservative way.  */
-      uint64_t sve_estimate = pred_cycles_per_iter + 1;
+      fractional_cost sve_estimate = pred_cycles_per_iter + 1;
       if (scalar_cycles_per_iter < sve_estimate)
 	{
 	  unsigned int min_cost
@@ -15655,8 +15662,10 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
       if (could_use_advsimd && advsimd_cycles_per_iter < sve_estimate)
 	{
 	  /* This ensures that min_cost > orig_body_cost * 2.  */
-	  unsigned int min_cost
-	    = orig_body_cost * CEIL (sve_estimate, advsimd_cycles_per_iter) + 1;
+	  unsigned int factor
+	    = fractional_cost::scale (1, sve_estimate,
+				      advsimd_cycles_per_iter);
+	  unsigned int min_cost = orig_body_cost * factor + 1;
 	  if (body_cost < min_cost)
 	    {
 	      if (dump_enabled_p ())
@@ -15689,8 +15698,8 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
      so minor differences should only result in minor changes.  */
   else if (scalar_cycles_per_iter < vector_cycles_per_iter)
     {
-      body_cost = CEIL (body_cost * vector_cycles_per_iter,
-			scalar_cycles_per_iter);
+      body_cost = fractional_cost::scale (body_cost, vector_cycles_per_iter,
+					  scalar_cycles_per_iter);
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "Increasing body cost to %d because scalar code"
@@ -15715,8 +15724,8 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, unsigned int body_cost)
 	   && scalar_cycles_per_iter > vector_cycles_per_iter
 	   && !should_disparage)
     {
-      body_cost = CEIL (body_cost * vector_cycles_per_iter,
-			scalar_cycles_per_iter);
+      body_cost = fractional_cost::scale (body_cost, vector_cycles_per_iter,
+					  scalar_cycles_per_iter);
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "Decreasing body cost to %d account for smaller"
@@ -25584,12 +25593,106 @@ aarch64_test_loading_full_dump ()
   ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
 }
 
+/* Test the fractional_cost class.  */
+
+static void
+aarch64_test_fractional_cost ()
+{
+  using cf = fractional_cost;
+
+  ASSERT_EQ (cf (0, 20), 0);
+
+  ASSERT_EQ (cf (4, 2), 2);
+  ASSERT_EQ (3, cf (9, 3));
+
+  ASSERT_NE (cf (5, 2), 2);
+  ASSERT_NE (3, cf (8, 3));
+
+  ASSERT_EQ (cf (7, 11) + cf (15, 11), 2);
+  ASSERT_EQ (cf (2, 3) + cf (3, 5), cf (19, 15));
+  ASSERT_EQ (cf (2, 3) + cf (1, 6) + cf (1, 6), 1);
+
+  ASSERT_EQ (cf (14, 15) - cf (4, 15), cf (2, 3));
+  ASSERT_EQ (cf (1, 4) - cf (1, 2), 0);
+  ASSERT_EQ (cf (3, 5) - cf (1, 10), cf (1, 2));
+  ASSERT_EQ (cf (11, 3) - 3, cf (2, 3));
+  ASSERT_EQ (3 - cf (7, 3), cf (2, 3));
+  ASSERT_EQ (3 - cf (10, 3), 0);
+
+  ASSERT_EQ (cf (2, 3) * 5, cf (10, 3));
+  ASSERT_EQ (14 * cf (11, 21), cf (22, 3));
+
+  ASSERT_TRUE (cf (4, 15) < cf (5, 15));
+  ASSERT_FALSE (cf (5, 15) < cf (5, 15));
+  ASSERT_FALSE (cf (6, 15) < cf (5, 15));
+  ASSERT_TRUE (cf (1, 3) < cf (2, 5));
+  ASSERT_TRUE (cf (1, 12) < cf (1, 6));
+  ASSERT_FALSE (cf (5, 3) < cf (5, 3));
+  ASSERT_TRUE (cf (239, 240) < 1);
+  ASSERT_FALSE (cf (240, 240) < 1);
+  ASSERT_FALSE (cf (241, 240) < 1);
+  ASSERT_FALSE (2 < cf (207, 104));
+  ASSERT_FALSE (2 < cf (208, 104));
+  ASSERT_TRUE (2 < cf (209, 104));
+
+  ASSERT_TRUE (cf (4, 15) < cf (5, 15));
+  ASSERT_FALSE (cf (5, 15) < cf (5, 15));
+  ASSERT_FALSE (cf (6, 15) < cf (5, 15));
+  ASSERT_TRUE (cf (1, 3) < cf (2, 5));
+  ASSERT_TRUE (cf (1, 12) < cf (1, 6));
+  ASSERT_FALSE (cf (5, 3) < cf (5, 3));
+  ASSERT_TRUE (cf (239, 240) < 1);
+  ASSERT_FALSE (cf (240, 240) < 1);
+  ASSERT_FALSE (cf (241, 240) < 1);
+  ASSERT_FALSE (2 < cf (207, 104));
+  ASSERT_FALSE (2 < cf (208, 104));
+  ASSERT_TRUE (2 < cf (209, 104));
+
+  ASSERT_FALSE (cf (4, 15) >= cf (5, 15));
+  ASSERT_TRUE (cf (5, 15) >= cf (5, 15));
+  ASSERT_TRUE (cf (6, 15) >= cf (5, 15));
+  ASSERT_FALSE (cf (1, 3) >= cf (2, 5));
+  ASSERT_FALSE (cf (1, 12) >= cf (1, 6));
+  ASSERT_TRUE (cf (5, 3) >= cf (5, 3));
+  ASSERT_FALSE (cf (239, 240) >= 1);
+  ASSERT_TRUE (cf (240, 240) >= 1);
+  ASSERT_TRUE (cf (241, 240) >= 1);
+  ASSERT_TRUE (2 >= cf (207, 104));
+  ASSERT_TRUE (2 >= cf (208, 104));
+  ASSERT_FALSE (2 >= cf (209, 104));
+
+  ASSERT_FALSE (cf (4, 15) > cf (5, 15));
+  ASSERT_FALSE (cf (5, 15) > cf (5, 15));
+  ASSERT_TRUE (cf (6, 15) > cf (5, 15));
+  ASSERT_FALSE (cf (1, 3) > cf (2, 5));
+  ASSERT_FALSE (cf (1, 12) > cf (1, 6));
+  ASSERT_FALSE (cf (5, 3) > cf (5, 3));
+  ASSERT_FALSE (cf (239, 240) > 1);
+  ASSERT_FALSE (cf (240, 240) > 1);
+  ASSERT_TRUE (cf (241, 240) > 1);
+  ASSERT_TRUE (2 > cf (207, 104));
+  ASSERT_FALSE (2 > cf (208, 104));
+  ASSERT_FALSE (2 > cf (209, 104));
+
+  ASSERT_EQ (cf (1, 2).ceil (), 1);
+  ASSERT_EQ (cf (11, 7).ceil (), 2);
+  ASSERT_EQ (cf (20, 1).ceil (), 20);
+  ASSERT_EQ ((cf (0xfffffffd) + 1).ceil (), 0xfffffffe);
+  ASSERT_EQ ((cf (0xfffffffd) + 2).ceil (), 0xffffffff);
+  ASSERT_EQ ((cf (0xfffffffd) + 3).ceil (), 0xffffffff);
+  ASSERT_EQ ((cf (0x7fffffff) * 2).ceil (), 0xfffffffe);
+  ASSERT_EQ ((cf (0x80000000) * 2).ceil (), 0xffffffff);
+
+  ASSERT_EQ (cf (1, 2).as_double (), 0.5);
+}
+
 /* Run all target-specific selftests.  */
 
 static void
 aarch64_run_selftests (void)
 {
   aarch64_test_loading_full_dump ();
+  aarch64_test_fractional_cost ();
 }
 
 } // namespace selftest
diff --git a/gcc/config/aarch64/fractional-cost.h b/gcc/config/aarch64/fractional-cost.h
new file mode 100644
index 00000000000..6a01634905b
--- /dev/null
+++ b/gcc/config/aarch64/fractional-cost.h
@@ -0,0 +1,236 @@
+// Simple fixed-point representation of fractional costs
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of GCC.
+//
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+//
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// A simple saturating fixed-point type for representing fractional
+// intermediate results in cost calculations.  The input and result
+// costs are assumed to be uint32_ts.  Unlike sreal, the class can
+// represent most values that we care about exactly (without rounding).
+// See the comment above the SCALE field for the current set of
+// exactly-representable reciprocals.
+class fractional_cost
+{
+public:
+  // Construct an object equal to INT_VALUE.
+  constexpr fractional_cost (uint32_t int_value = 0)
+    : m_value (uint64_t (int_value) * SCALE) {}
+
+  fractional_cost (uint32_t a, uint32_t b);
+
+  fractional_cost operator+ (const fractional_cost &) const;
+  fractional_cost operator- (const fractional_cost &) const;
+  fractional_cost operator* (uint32_t) const;
+
+  fractional_cost &operator+= (const fractional_cost &);
+  fractional_cost &operator-= (const fractional_cost &);
+  fractional_cost &operator*= (uint32_t);
+
+  bool operator== (const fractional_cost &) const;
+  bool operator!= (const fractional_cost &) const;
+  bool operator< (const fractional_cost &) const;
+  bool operator<= (const fractional_cost &) const;
+  bool operator>= (const fractional_cost &) const;
+  bool operator> (const fractional_cost &) const;
+
+  uint32_t ceil () const;
+
+  static uint32_t scale (uint32_t, fractional_cost, fractional_cost);
+
+  explicit operator bool () const { return m_value != 0; }
+
+  // Convert the value to a double.
+  double as_double () const { return double (m_value) / SCALE; }
+
+private:
+  enum raw { RAW };
+  constexpr fractional_cost (uint64_t value, raw) : m_value (value) {}
+
+  // A multiple of [1, 16] * 16.  This ensures that 1/N is representable
+  // for every every possible SVE element count N, or for any "X per cycle"
+  // value N in the range [1, 16].
+  static const uint32_t SCALE = 11531520;
+
+  // The value multiplied by BIAS.
+  uint64_t m_value;
+};
+
+// Construct a representation of A / B, rounding up if (contrary to
+// expectations) we can't represent the value exactly.  For now we
+// treat inexact values as a bug, since all values of B should come
+// from a small set of values that are known at compile time.
+inline fractional_cost::fractional_cost (uint32_t a, uint32_t b)
+  : m_value (CEIL (uint64_t (a) * SCALE, uint64_t (b)))
+{
+  gcc_checking_assert (SCALE % b == 0);
+}
+
+inline fractional_cost
+fractional_cost::operator+ (const fractional_cost &other) const
+{
+  uint64_t sum = m_value + other.m_value;
+  return { sum >= m_value ? sum : ~uint64_t (0), RAW };
+}
+
+inline fractional_cost &
+fractional_cost::operator+= (const fractional_cost &other)
+{
+  *this = *this + other;
+  return *this;
+}
+
+inline fractional_cost
+fractional_cost::operator- (const fractional_cost &other) const
+{
+  uint64_t diff = m_value - other.m_value;
+  return { diff <= m_value ? diff : 0, RAW };
+}
+
+inline fractional_cost &
+fractional_cost::operator-= (const fractional_cost &other)
+{
+  *this = *this - other;
+  return *this;
+}
+
+inline fractional_cost
+fractional_cost::operator* (uint32_t other) const
+{
+  if (other == 0)
+    return 0;
+
+  uint64_t max = ~uint64_t (0);
+  return { m_value <= max / other ? m_value * other : max, RAW };
+}
+
+inline fractional_cost &
+fractional_cost::operator*= (uint32_t other)
+{
+  *this = *this * other;
+  return *this;
+}
+
+inline bool
+fractional_cost::operator== (const fractional_cost &other) const
+{
+  return m_value == other.m_value;
+}
+
+inline bool
+fractional_cost::operator!= (const fractional_cost &other) const
+{
+  return m_value != other.m_value;
+}
+
+inline bool
+fractional_cost::operator< (const fractional_cost &other) const
+{
+  return m_value < other.m_value;
+}
+
+inline bool
+fractional_cost::operator<= (const fractional_cost &other) const
+{
+  return m_value <= other.m_value;
+}
+
+inline bool
+fractional_cost::operator>= (const fractional_cost &other) const
+{
+  return m_value >= other.m_value;
+}
+
+inline bool
+fractional_cost::operator> (const fractional_cost &other) const
+{
+  return m_value > other.m_value;
+}
+
+// Round the value up to the nearest integer and saturate to a uint32_t.
+inline uint32_t
+fractional_cost::ceil () const
+{
+  uint32_t max = ~uint32_t (0);
+  if (m_value <= uint64_t (max - 1) * SCALE)
+    return (m_value + SCALE - 1) / SCALE;
+  return max;
+}
+
+// Round (COST * A) / B up to the nearest integer and saturate to a uint32_t.
+inline uint32_t
+fractional_cost::scale (uint32_t cost, fractional_cost a, fractional_cost b)
+{
+  widest_int result = wi::div_ceil (widest_int (cost) * a.m_value,
+				    b.m_value, SIGNED);
+  if (result < ~uint32_t (0))
+    return result.to_shwi ();
+  return ~uint32_t (0);
+}
+
+inline fractional_cost
+operator+ (uint32_t a, const fractional_cost &b)
+{
+  return b.operator+ (a);
+}
+
+inline fractional_cost
+operator- (uint32_t a, const fractional_cost &b)
+{
+  return fractional_cost (a).operator- (b);
+}
+
+inline fractional_cost
+operator* (uint32_t a, const fractional_cost &b)
+{
+  return b.operator* (a);
+}
+
+inline bool
+operator== (uint32_t a, const fractional_cost &b)
+{
+  return b.operator== (a);
+}
+
+inline bool
+operator!= (uint32_t a, const fractional_cost &b)
+{
+  return b.operator!= (a);
+}
+
+inline bool
+operator< (uint32_t a, const fractional_cost &b)
+{
+  return b.operator> (a);
+}
+
+inline bool
+operator<= (uint32_t a, const fractional_cost &b)
+{
+  return b.operator>= (a);
+}
+
+inline bool
+operator>= (uint32_t a, const fractional_cost &b)
+{
+  return b.operator<= (a);
+}
+
+inline bool
+operator> (uint32_t a, const fractional_cost &b)
+{
+  return b.operator< (a);
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-08-06 14:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-06 14:37 [gcc r11-8833] aarch64: Add a simple fixed-point class for costing Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).