[PATCH] Force rtl templates to be inlined

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] Force rtl templates to be inlined
@ 2014-09-02  7:05 Andi Kleen
  2014-09-02  7:09 ` Andrew Pinski
  2014-09-02 14:59 ` David Malcolm
  0 siblings, 2 replies; 22+ messages in thread
From: Andi Kleen @ 2014-09-02  7:05 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andi Kleen, dmalcolm

From: Andi Kleen <ak@linux.intel.com>

I noticed that with the trunk compiler a range of the new rtl
inlines show up as hot in a profiler during stage1. I think
that happens because stage1 is not using optimization
and does not inline plain "inline".  And these rtl inlines
are very frequently called.

Mark them all with __attribute__((always_inline)) which forces
inlining even with -O0.

Passes bootstrap and testing on x86_64-linux.

Cc: dmalcolm@redhat.com

include/:

2014-09-01  Andi Kleen  <ak@linux.intel.com>

	* ansidecl.h (ALWAYS_INLINE): Add.

gcc/:

2014-09-01  Andi Kleen  <ak@linux.intel.com>

	* rtl.h (is_a_helper): Change inline to ALWAYS_INLINE.
	(rhs_regno): Dito.
	(init_costs_to_max): Dito.
	(init_costs_to_zero): Dito.
	(costs_lt_p): Dito.
	(costs_add_n_insns): Dito.
	(wi::int_traits ::get_precision): Dito.
	(wi::shwi): Dito.
	(wi::min_value): Dito.
	(wi::max_value): Dito.
	(set_rtx_cost): Dito.
	(get_full_set_rtx_cost): Dito.
	(set_src_cost): Dito.
	(get_full_set_src_cost): Dito.
	(get_mem_attrs): Dito.
---
 gcc/rtl.h          | 111 +++++++++++++++++++++++++++--------------------------
 include/ansidecl.h |   6 +++
 2 files changed, 62 insertions(+), 55 deletions(-)

diff --git a/gcc/rtl.h b/gcc/rtl.h
index beeed2f..d711e43 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GCC_RTL_H
 
 #include <utility>
+#include "ansidecl.h"
 #include "statistics.h"
 #include "machmode.h"
 #include "input.h"
@@ -418,7 +419,7 @@ public:
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_expr_list *>::test (rtx rt)
 {
   return rt->code == EXPR_LIST;
@@ -447,7 +448,7 @@ public:
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_insn_list *>::test (rtx rt)
 {
   return rt->code == INSN_LIST;
@@ -474,7 +475,7 @@ public:
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_sequence *>::test (rtx rt)
 {
   return rt->code == SEQUENCE;
@@ -482,7 +483,7 @@ is_a_helper <rtx_sequence *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <const rtx_sequence *>::test (const_rtx rt)
 {
   return rt->code == SEQUENCE;
@@ -778,7 +779,7 @@ struct GTY(()) rtvec_def {
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_insn *>::test (rtx rt)
 {
   return (INSN_P (rt)
@@ -790,7 +791,7 @@ is_a_helper <rtx_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <const rtx_insn *>::test (const_rtx rt)
 {
   return (INSN_P (rt)
@@ -802,7 +803,7 @@ is_a_helper <const rtx_insn *>::test (const_rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_debug_insn *>::test (rtx rt)
 {
   return DEBUG_INSN_P (rt);
@@ -810,7 +811,7 @@ is_a_helper <rtx_debug_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_nonjump_insn *>::test (rtx rt)
 {
   return NONJUMP_INSN_P (rt);
@@ -818,7 +819,7 @@ is_a_helper <rtx_nonjump_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_jump_insn *>::test (rtx rt)
 {
   return JUMP_P (rt);
@@ -826,7 +827,7 @@ is_a_helper <rtx_jump_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_call_insn *>::test (rtx rt)
 {
   return CALL_P (rt);
@@ -834,7 +835,7 @@ is_a_helper <rtx_call_insn *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_call_insn *>::test (rtx_insn *insn)
 {
   return CALL_P (insn);
@@ -842,7 +843,7 @@ is_a_helper <rtx_call_insn *>::test (rtx_insn *insn)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_jump_table_data *>::test (rtx rt)
 {
   return JUMP_TABLE_DATA_P (rt);
@@ -850,7 +851,7 @@ is_a_helper <rtx_jump_table_data *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_jump_table_data *>::test (rtx_insn *insn)
 {
   return JUMP_TABLE_DATA_P (insn);
@@ -858,7 +859,7 @@ is_a_helper <rtx_jump_table_data *>::test (rtx_insn *insn)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_barrier *>::test (rtx rt)
 {
   return BARRIER_P (rt);
@@ -866,7 +867,7 @@ is_a_helper <rtx_barrier *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_code_label *>::test (rtx rt)
 {
   return LABEL_P (rt);
@@ -874,7 +875,7 @@ is_a_helper <rtx_code_label *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_code_label *>::test (rtx_insn *insn)
 {
   return LABEL_P (insn);
@@ -882,7 +883,7 @@ is_a_helper <rtx_code_label *>::test (rtx_insn *insn)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_note *>::test (rtx rt)
 {
   return NOTE_P (rt);
@@ -890,7 +891,7 @@ is_a_helper <rtx_note *>::test (rtx rt)
 
 template <>
 template <>
-inline bool
+ALWAYS_INLINE bool
 is_a_helper <rtx_note *>::test (rtx_insn *insn)
 {
   return NOTE_P (insn);
@@ -1257,26 +1258,26 @@ extern void rtl_check_failed_flag (const char *, const_rtx, const char *,
 
 /* Methods of rtx_expr_list.  */
 
-inline rtx_expr_list *rtx_expr_list::next () const
+ALWAYS_INLINE rtx_expr_list *rtx_expr_list::next () const
 {
   rtx tmp = XEXP (this, 1);
   return safe_as_a <rtx_expr_list *> (tmp);
 }
 
-inline rtx rtx_expr_list::element () const
+ALWAYS_INLINE rtx rtx_expr_list::element () const
 {
   return XEXP (this, 0);
 }
 
 /* Methods of rtx_insn_list.  */
 
-inline rtx_insn_list *rtx_insn_list::next () const
+ALWAYS_INLINE rtx_insn_list *rtx_insn_list::next () const
 {
   rtx tmp = XEXP (this, 1);
   return safe_as_a <rtx_insn_list *> (tmp);
 }
 
-inline rtx_insn *rtx_insn_list::insn () const
+ALWAYS_INLINE rtx_insn *rtx_insn_list::insn () const
 {
   rtx tmp = XEXP (this, 0);
   return safe_as_a <rtx_insn *> (tmp);
@@ -1284,17 +1285,17 @@ inline rtx_insn *rtx_insn_list::insn () const
 
 /* Methods of rtx_sequence.  */
 
-inline int rtx_sequence::len () const
+ALWAYS_INLINE int rtx_sequence::len () const
 {
   return XVECLEN (this, 0);
 }
 
-inline rtx rtx_sequence::element (int index) const
+ALWAYS_INLINE rtx rtx_sequence::element (int index) const
 {
   return XVECEXP (this, 0, index);
 }
 
-inline rtx_insn *rtx_sequence::insn (int index) const
+ALWAYS_INLINE rtx_insn *rtx_sequence::insn (int index) const
 {
   return as_a <rtx_insn *> (XVECEXP (this, 0, index));
 }
@@ -1303,12 +1304,12 @@ inline rtx_insn *rtx_sequence::insn (int index) const
 
 /* Holds a unique number for each insn.
    These are not necessarily sequentially increasing.  */
-inline int INSN_UID (const_rtx insn)
+ALWAYS_INLINE int INSN_UID (const_rtx insn)
 {
   return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID",
 				    (insn))->u2.insn_uid;
 }
-inline int& INSN_UID (rtx insn)
+ALWAYS_INLINE int& INSN_UID (rtx insn)
 {
   return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID",
 				    (insn))->u2.insn_uid;
@@ -1321,60 +1322,60 @@ inline int& INSN_UID (rtx insn)
    and an lvalue form:
      SET_NEXT_INSN/SET_PREV_INSN.  */
 
-inline rtx_insn *PREV_INSN (const rtx_insn *insn)
+ALWAYS_INLINE rtx_insn *PREV_INSN (const rtx_insn *insn)
 {
   rtx prev = XEXP (insn, 0);
   return safe_as_a <rtx_insn *> (prev);
 }
 
-inline rtx& SET_PREV_INSN (rtx_insn *insn)
+ALWAYS_INLINE rtx& SET_PREV_INSN (rtx_insn *insn)
 {
   return XEXP (insn, 0);
 }
 
-inline rtx_insn *NEXT_INSN (const rtx_insn *insn)
+ALWAYS_INLINE rtx_insn *NEXT_INSN (const rtx_insn *insn)
 {
   rtx next = XEXP (insn, 1);
   return safe_as_a <rtx_insn *> (next);
 }
 
-inline rtx& SET_NEXT_INSN (rtx_insn *insn)
+ALWAYS_INLINE rtx& SET_NEXT_INSN (rtx_insn *insn)
 {
   return XEXP (insn, 1);
 }
 
-inline basic_block BLOCK_FOR_INSN (const_rtx insn)
+ALWAYS_INLINE basic_block BLOCK_FOR_INSN (const_rtx insn)
 {
   return XBBDEF (insn, 2);
 }
 
-inline basic_block& BLOCK_FOR_INSN (rtx insn)
+ALWAYS_INLINE basic_block& BLOCK_FOR_INSN (rtx insn)
 {
   return XBBDEF (insn, 2);
 }
 
 /* The body of an insn.  */
-inline rtx PATTERN (const_rtx insn)
+ALWAYS_INLINE rtx PATTERN (const_rtx insn)
 {
   return XEXP (insn, 3);
 }
 
-inline rtx& PATTERN (rtx insn)
+ALWAYS_INLINE rtx& PATTERN (rtx insn)
 {
   return XEXP (insn, 3);
 }
 
-inline unsigned int INSN_LOCATION (const_rtx insn)
+ALWAYS_INLINE unsigned int INSN_LOCATION (const_rtx insn)
 {
   return XUINT (insn, 4);
 }
 
-inline unsigned int& INSN_LOCATION (rtx insn)
+ALWAYS_INLINE unsigned int& INSN_LOCATION (rtx insn)
 {
   return XUINT (insn, 4);
 }
 
-inline bool INSN_HAS_LOCATION (const rtx_insn *insn)
+ALWAYS_INLINE bool INSN_HAS_LOCATION (const rtx_insn *insn)
 {
   return LOCATION_LOCUS (INSN_LOCATION (insn)) != UNKNOWN_LOCATION;
 }
@@ -1387,7 +1388,7 @@ inline bool INSN_HAS_LOCATION (const rtx_insn *insn)
    -1 means this instruction has not been recognized yet.  */
 #define INSN_CODE(INSN) XINT (INSN, 5)
 
-inline rtvec rtx_jump_table_data::get_labels () const
+ALWAYS_INLINE rtvec rtx_jump_table_data::get_labels () const
 {
   rtx pat = PATTERN (this);
   if (GET_CODE (pat) == ADDR_VEC)
@@ -1658,7 +1659,7 @@ enum label_kind
    be decremented and possibly the label can be deleted.  */
 #define JUMP_LABEL(INSN)   XCEXP (INSN, 7, JUMP_INSN)
 
-inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
+ALWAYS_INLINE rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
 {
   return safe_as_a <rtx_insn *> (JUMP_LABEL (insn));
 }
@@ -1682,7 +1683,7 @@ inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
   (RTL_FLAG_CHECK1 ("ORIGINAL_REGNO", (RTX), REG)->u2.original_regno)
 
 /* Force the REGNO macro to only be used on the lhs.  */
-static inline unsigned int
+static ALWAYS_INLINE unsigned int
 rhs_regno (const_rtx x)
 {
   return XCUINT (x, 0, REG);
@@ -1774,7 +1775,7 @@ struct full_rtx_costs
 };
 
 /* Initialize a full_rtx_costs structure C to the maximum cost.  */
-static inline void
+static ALWAYS_INLINE void
 init_costs_to_max (struct full_rtx_costs *c)
 {
   c->speed = MAX_COST;
@@ -1782,7 +1783,7 @@ init_costs_to_max (struct full_rtx_costs *c)
 }
 
 /* Initialize a full_rtx_costs structure C to zero cost.  */
-static inline void
+static ALWAYS_INLINE void
 init_costs_to_zero (struct full_rtx_costs *c)
 {
   c->speed = 0;
@@ -1791,7 +1792,7 @@ init_costs_to_zero (struct full_rtx_costs *c)
 
 /* Compare two full_rtx_costs structures A and B, returning true
    if A < B when optimizing for speed.  */
-static inline bool
+static ALWAYS_INLINE bool
 costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
 	    bool speed)
 {
@@ -1805,7 +1806,7 @@ costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
 
 /* Increase both members of the full_rtx_costs structure C by the
    cost of N insns.  */
-static inline void
+static ALWAYS_INLINE void
 costs_add_n_insns (struct full_rtx_costs *c, int n)
 {
   c->speed += COSTS_N_INSNS (n);
@@ -1904,13 +1905,13 @@ namespace wi
   };
 }
 
-inline unsigned int
+ALWAYS_INLINE unsigned int
 wi::int_traits <rtx_mode_t>::get_precision (const rtx_mode_t &x)
 {
   return GET_MODE_PRECISION (x.second);
 }
 
-inline wi::storage_ref
+ALWAYS_INLINE wi::storage_ref
 wi::int_traits <rtx_mode_t>::decompose (HOST_WIDE_INT *,
 					unsigned int precision,
 					const rtx_mode_t &x)
@@ -1949,7 +1950,7 @@ namespace wi
   wide_int max_value (enum machine_mode, signop);
 }
 
-inline wi::hwi_with_prec
+ALWAYS_INLINE wi::hwi_with_prec
 wi::shwi (HOST_WIDE_INT val, enum machine_mode mode)
 {
   return shwi (val, GET_MODE_PRECISION (mode));
@@ -1957,7 +1958,7 @@ wi::shwi (HOST_WIDE_INT val, enum machine_mode mode)
 
 /* Produce the smallest number that is represented in MODE.  The precision
    is taken from MODE and the sign from SGN.  */
-inline wide_int
+ALWAYS_INLINE wide_int
 wi::min_value (enum machine_mode mode, signop sgn)
 {
   return min_value (GET_MODE_PRECISION (mode), sgn);
@@ -1965,7 +1966,7 @@ wi::min_value (enum machine_mode mode, signop sgn)
 
 /* Produce the largest number that is represented in MODE.  The precision
    is taken from MODE and the sign from SGN.  */
-inline wide_int
+ALWAYS_INLINE wide_int
 wi::max_value (enum machine_mode mode, signop sgn)
 {
   return max_value (GET_MODE_PRECISION (mode), sgn);
@@ -2007,7 +2008,7 @@ extern enum rtx_code get_index_code (const struct address_info *);
 /* Return the cost of SET X.  SPEED_P is true if optimizing for speed
    rather than size.  */
 
-static inline int
+static ALWAYS_INLINE int
 set_rtx_cost (rtx x, bool speed_p)
 {
   return rtx_cost (x, INSN, 4, speed_p);
@@ -2015,7 +2016,7 @@ set_rtx_cost (rtx x, bool speed_p)
 
 /* Like set_rtx_cost, but return both the speed and size costs in C.  */
 
-static inline void
+static ALWAYS_INLINE void
 get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c)
 {
   get_full_rtx_cost (x, INSN, 4, c);
@@ -2025,7 +2026,7 @@ get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c)
    of a register move.  SPEED_P is true if optimizing for speed rather
    than size.  */
 
-static inline int
+static ALWAYS_INLINE int
 set_src_cost (rtx x, bool speed_p)
 {
   return rtx_cost (x, SET, 1, speed_p);
@@ -2033,7 +2034,7 @@ set_src_cost (rtx x, bool speed_p)
 
 /* Like set_src_cost, but return both the speed and size costs in C.  */
 
-static inline void
+static ALWAYS_INLINE void
 get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
 {
   get_full_rtx_cost (x, SET, 1, c);
@@ -3055,7 +3056,7 @@ extern struct target_rtl *this_target_rtl;
 
 #ifndef GENERATOR_FILE
 /* Return the attributes of a MEM rtx.  */
-static inline struct mem_attrs *
+static ALWAYS_INLINE struct mem_attrs *
 get_mem_attrs (const_rtx x)
 {
   struct mem_attrs *attrs;
diff --git a/include/ansidecl.h b/include/ansidecl.h
index 0fb23bb..9132ee0 100644
--- a/include/ansidecl.h
+++ b/include/ansidecl.h
@@ -306,6 +306,12 @@ So instead we use the macro below and test it against specific values.  */
 #define ENUM_BITFIELD(TYPE) unsigned int
 #endif
 
+#ifdef __GNUC__
+#define ALWAYS_INLINE __attribute__ ((always_inline)) inline
+#else
+#define ALWAYS_INLINE inline
+#endif
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.1.0

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02  7:05 [PATCH] Force rtl templates to be inlined Andi Kleen
@ 2014-09-02  7:09 ` Andrew Pinski
  2014-09-02  7:20   ` Andi Kleen
  2014-09-02 14:59 ` David Malcolm
  1 sibling, 1 reply; 22+ messages in thread
From: Andrew Pinski @ 2014-09-02  7:09 UTC (permalink / raw)
  To: Andi Kleen; +Cc: GCC Patches, Andi Kleen, David Malcolm

On Tue, Sep 2, 2014 at 12:03 AM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> I noticed that with the trunk compiler a range of the new rtl
> inlines show up as hot in a profiler during stage1. I think
> that happens because stage1 is not using optimization
> and does not inline plain "inline".  And these rtl inlines
> are very frequently called.
>
> Mark them all with __attribute__((always_inline)) which forces
> inlining even with -O0.


I think this is wrong and should not be committed.  stage1 is designed
to be without optimization and there have been bugs in the past in the
area of always_inline too.

Thanks,
Andrew Pinski

>
> Passes bootstrap and testing on x86_64-linux.
>
> Cc: dmalcolm@redhat.com
>
> include/:
>
> 2014-09-01  Andi Kleen  <ak@linux.intel.com>
>
>         * ansidecl.h (ALWAYS_INLINE): Add.
>
> gcc/:
>
> 2014-09-01  Andi Kleen  <ak@linux.intel.com>
>
>         * rtl.h (is_a_helper): Change inline to ALWAYS_INLINE.
>         (rhs_regno): Dito.
>         (init_costs_to_max): Dito.
>         (init_costs_to_zero): Dito.
>         (costs_lt_p): Dito.
>         (costs_add_n_insns): Dito.
>         (wi::int_traits ::get_precision): Dito.
>         (wi::shwi): Dito.
>         (wi::min_value): Dito.
>         (wi::max_value): Dito.
>         (set_rtx_cost): Dito.
>         (get_full_set_rtx_cost): Dito.
>         (set_src_cost): Dito.
>         (get_full_set_src_cost): Dito.
>         (get_mem_attrs): Dito.
> ---
>  gcc/rtl.h          | 111 +++++++++++++++++++++++++++--------------------------
>  include/ansidecl.h |   6 +++
>  2 files changed, 62 insertions(+), 55 deletions(-)
>
> diff --git a/gcc/rtl.h b/gcc/rtl.h
> index beeed2f..d711e43 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
>  #define GCC_RTL_H
>
>  #include <utility>
> +#include "ansidecl.h"
>  #include "statistics.h"
>  #include "machmode.h"
>  #include "input.h"
> @@ -418,7 +419,7 @@ public:
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_expr_list *>::test (rtx rt)
>  {
>    return rt->code == EXPR_LIST;
> @@ -447,7 +448,7 @@ public:
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_insn_list *>::test (rtx rt)
>  {
>    return rt->code == INSN_LIST;
> @@ -474,7 +475,7 @@ public:
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_sequence *>::test (rtx rt)
>  {
>    return rt->code == SEQUENCE;
> @@ -482,7 +483,7 @@ is_a_helper <rtx_sequence *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <const rtx_sequence *>::test (const_rtx rt)
>  {
>    return rt->code == SEQUENCE;
> @@ -778,7 +779,7 @@ struct GTY(()) rtvec_def {
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_insn *>::test (rtx rt)
>  {
>    return (INSN_P (rt)
> @@ -790,7 +791,7 @@ is_a_helper <rtx_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <const rtx_insn *>::test (const_rtx rt)
>  {
>    return (INSN_P (rt)
> @@ -802,7 +803,7 @@ is_a_helper <const rtx_insn *>::test (const_rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_debug_insn *>::test (rtx rt)
>  {
>    return DEBUG_INSN_P (rt);
> @@ -810,7 +811,7 @@ is_a_helper <rtx_debug_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_nonjump_insn *>::test (rtx rt)
>  {
>    return NONJUMP_INSN_P (rt);
> @@ -818,7 +819,7 @@ is_a_helper <rtx_nonjump_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_jump_insn *>::test (rtx rt)
>  {
>    return JUMP_P (rt);
> @@ -826,7 +827,7 @@ is_a_helper <rtx_jump_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_call_insn *>::test (rtx rt)
>  {
>    return CALL_P (rt);
> @@ -834,7 +835,7 @@ is_a_helper <rtx_call_insn *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_call_insn *>::test (rtx_insn *insn)
>  {
>    return CALL_P (insn);
> @@ -842,7 +843,7 @@ is_a_helper <rtx_call_insn *>::test (rtx_insn *insn)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_jump_table_data *>::test (rtx rt)
>  {
>    return JUMP_TABLE_DATA_P (rt);
> @@ -850,7 +851,7 @@ is_a_helper <rtx_jump_table_data *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_jump_table_data *>::test (rtx_insn *insn)
>  {
>    return JUMP_TABLE_DATA_P (insn);
> @@ -858,7 +859,7 @@ is_a_helper <rtx_jump_table_data *>::test (rtx_insn *insn)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_barrier *>::test (rtx rt)
>  {
>    return BARRIER_P (rt);
> @@ -866,7 +867,7 @@ is_a_helper <rtx_barrier *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_code_label *>::test (rtx rt)
>  {
>    return LABEL_P (rt);
> @@ -874,7 +875,7 @@ is_a_helper <rtx_code_label *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_code_label *>::test (rtx_insn *insn)
>  {
>    return LABEL_P (insn);
> @@ -882,7 +883,7 @@ is_a_helper <rtx_code_label *>::test (rtx_insn *insn)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_note *>::test (rtx rt)
>  {
>    return NOTE_P (rt);
> @@ -890,7 +891,7 @@ is_a_helper <rtx_note *>::test (rtx rt)
>
>  template <>
>  template <>
> -inline bool
> +ALWAYS_INLINE bool
>  is_a_helper <rtx_note *>::test (rtx_insn *insn)
>  {
>    return NOTE_P (insn);
> @@ -1257,26 +1258,26 @@ extern void rtl_check_failed_flag (const char *, const_rtx, const char *,
>
>  /* Methods of rtx_expr_list.  */
>
> -inline rtx_expr_list *rtx_expr_list::next () const
> +ALWAYS_INLINE rtx_expr_list *rtx_expr_list::next () const
>  {
>    rtx tmp = XEXP (this, 1);
>    return safe_as_a <rtx_expr_list *> (tmp);
>  }
>
> -inline rtx rtx_expr_list::element () const
> +ALWAYS_INLINE rtx rtx_expr_list::element () const
>  {
>    return XEXP (this, 0);
>  }
>
>  /* Methods of rtx_insn_list.  */
>
> -inline rtx_insn_list *rtx_insn_list::next () const
> +ALWAYS_INLINE rtx_insn_list *rtx_insn_list::next () const
>  {
>    rtx tmp = XEXP (this, 1);
>    return safe_as_a <rtx_insn_list *> (tmp);
>  }
>
> -inline rtx_insn *rtx_insn_list::insn () const
> +ALWAYS_INLINE rtx_insn *rtx_insn_list::insn () const
>  {
>    rtx tmp = XEXP (this, 0);
>    return safe_as_a <rtx_insn *> (tmp);
> @@ -1284,17 +1285,17 @@ inline rtx_insn *rtx_insn_list::insn () const
>
>  /* Methods of rtx_sequence.  */
>
> -inline int rtx_sequence::len () const
> +ALWAYS_INLINE int rtx_sequence::len () const
>  {
>    return XVECLEN (this, 0);
>  }
>
> -inline rtx rtx_sequence::element (int index) const
> +ALWAYS_INLINE rtx rtx_sequence::element (int index) const
>  {
>    return XVECEXP (this, 0, index);
>  }
>
> -inline rtx_insn *rtx_sequence::insn (int index) const
> +ALWAYS_INLINE rtx_insn *rtx_sequence::insn (int index) const
>  {
>    return as_a <rtx_insn *> (XVECEXP (this, 0, index));
>  }
> @@ -1303,12 +1304,12 @@ inline rtx_insn *rtx_sequence::insn (int index) const
>
>  /* Holds a unique number for each insn.
>     These are not necessarily sequentially increasing.  */
> -inline int INSN_UID (const_rtx insn)
> +ALWAYS_INLINE int INSN_UID (const_rtx insn)
>  {
>    return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID",
>                                     (insn))->u2.insn_uid;
>  }
> -inline int& INSN_UID (rtx insn)
> +ALWAYS_INLINE int& INSN_UID (rtx insn)
>  {
>    return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID",
>                                     (insn))->u2.insn_uid;
> @@ -1321,60 +1322,60 @@ inline int& INSN_UID (rtx insn)
>     and an lvalue form:
>       SET_NEXT_INSN/SET_PREV_INSN.  */
>
> -inline rtx_insn *PREV_INSN (const rtx_insn *insn)
> +ALWAYS_INLINE rtx_insn *PREV_INSN (const rtx_insn *insn)
>  {
>    rtx prev = XEXP (insn, 0);
>    return safe_as_a <rtx_insn *> (prev);
>  }
>
> -inline rtx& SET_PREV_INSN (rtx_insn *insn)
> +ALWAYS_INLINE rtx& SET_PREV_INSN (rtx_insn *insn)
>  {
>    return XEXP (insn, 0);
>  }
>
> -inline rtx_insn *NEXT_INSN (const rtx_insn *insn)
> +ALWAYS_INLINE rtx_insn *NEXT_INSN (const rtx_insn *insn)
>  {
>    rtx next = XEXP (insn, 1);
>    return safe_as_a <rtx_insn *> (next);
>  }
>
> -inline rtx& SET_NEXT_INSN (rtx_insn *insn)
> +ALWAYS_INLINE rtx& SET_NEXT_INSN (rtx_insn *insn)
>  {
>    return XEXP (insn, 1);
>  }
>
> -inline basic_block BLOCK_FOR_INSN (const_rtx insn)
> +ALWAYS_INLINE basic_block BLOCK_FOR_INSN (const_rtx insn)
>  {
>    return XBBDEF (insn, 2);
>  }
>
> -inline basic_block& BLOCK_FOR_INSN (rtx insn)
> +ALWAYS_INLINE basic_block& BLOCK_FOR_INSN (rtx insn)
>  {
>    return XBBDEF (insn, 2);
>  }
>
>  /* The body of an insn.  */
> -inline rtx PATTERN (const_rtx insn)
> +ALWAYS_INLINE rtx PATTERN (const_rtx insn)
>  {
>    return XEXP (insn, 3);
>  }
>
> -inline rtx& PATTERN (rtx insn)
> +ALWAYS_INLINE rtx& PATTERN (rtx insn)
>  {
>    return XEXP (insn, 3);
>  }
>
> -inline unsigned int INSN_LOCATION (const_rtx insn)
> +ALWAYS_INLINE unsigned int INSN_LOCATION (const_rtx insn)
>  {
>    return XUINT (insn, 4);
>  }
>
> -inline unsigned int& INSN_LOCATION (rtx insn)
> +ALWAYS_INLINE unsigned int& INSN_LOCATION (rtx insn)
>  {
>    return XUINT (insn, 4);
>  }
>
> -inline bool INSN_HAS_LOCATION (const rtx_insn *insn)
> +ALWAYS_INLINE bool INSN_HAS_LOCATION (const rtx_insn *insn)
>  {
>    return LOCATION_LOCUS (INSN_LOCATION (insn)) != UNKNOWN_LOCATION;
>  }
> @@ -1387,7 +1388,7 @@ inline bool INSN_HAS_LOCATION (const rtx_insn *insn)
>     -1 means this instruction has not been recognized yet.  */
>  #define INSN_CODE(INSN) XINT (INSN, 5)
>
> -inline rtvec rtx_jump_table_data::get_labels () const
> +ALWAYS_INLINE rtvec rtx_jump_table_data::get_labels () const
>  {
>    rtx pat = PATTERN (this);
>    if (GET_CODE (pat) == ADDR_VEC)
> @@ -1658,7 +1659,7 @@ enum label_kind
>     be decremented and possibly the label can be deleted.  */
>  #define JUMP_LABEL(INSN)   XCEXP (INSN, 7, JUMP_INSN)
>
> -inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
> +ALWAYS_INLINE rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
>  {
>    return safe_as_a <rtx_insn *> (JUMP_LABEL (insn));
>  }
> @@ -1682,7 +1683,7 @@ inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn)
>    (RTL_FLAG_CHECK1 ("ORIGINAL_REGNO", (RTX), REG)->u2.original_regno)
>
>  /* Force the REGNO macro to only be used on the lhs.  */
> -static inline unsigned int
> +static ALWAYS_INLINE unsigned int
>  rhs_regno (const_rtx x)
>  {
>    return XCUINT (x, 0, REG);
> @@ -1774,7 +1775,7 @@ struct full_rtx_costs
>  };
>
>  /* Initialize a full_rtx_costs structure C to the maximum cost.  */
> -static inline void
> +static ALWAYS_INLINE void
>  init_costs_to_max (struct full_rtx_costs *c)
>  {
>    c->speed = MAX_COST;
> @@ -1782,7 +1783,7 @@ init_costs_to_max (struct full_rtx_costs *c)
>  }
>
>  /* Initialize a full_rtx_costs structure C to zero cost.  */
> -static inline void
> +static ALWAYS_INLINE void
>  init_costs_to_zero (struct full_rtx_costs *c)
>  {
>    c->speed = 0;
> @@ -1791,7 +1792,7 @@ init_costs_to_zero (struct full_rtx_costs *c)
>
>  /* Compare two full_rtx_costs structures A and B, returning true
>     if A < B when optimizing for speed.  */
> -static inline bool
> +static ALWAYS_INLINE bool
>  costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
>             bool speed)
>  {
> @@ -1805,7 +1806,7 @@ costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b,
>
>  /* Increase both members of the full_rtx_costs structure C by the
>     cost of N insns.  */
> -static inline void
> +static ALWAYS_INLINE void
>  costs_add_n_insns (struct full_rtx_costs *c, int n)
>  {
>    c->speed += COSTS_N_INSNS (n);
> @@ -1904,13 +1905,13 @@ namespace wi
>    };
>  }
>
> -inline unsigned int
> +ALWAYS_INLINE unsigned int
>  wi::int_traits <rtx_mode_t>::get_precision (const rtx_mode_t &x)
>  {
>    return GET_MODE_PRECISION (x.second);
>  }
>
> -inline wi::storage_ref
> +ALWAYS_INLINE wi::storage_ref
>  wi::int_traits <rtx_mode_t>::decompose (HOST_WIDE_INT *,
>                                         unsigned int precision,
>                                         const rtx_mode_t &x)
> @@ -1949,7 +1950,7 @@ namespace wi
>    wide_int max_value (enum machine_mode, signop);
>  }
>
> -inline wi::hwi_with_prec
> +ALWAYS_INLINE wi::hwi_with_prec
>  wi::shwi (HOST_WIDE_INT val, enum machine_mode mode)
>  {
>    return shwi (val, GET_MODE_PRECISION (mode));
> @@ -1957,7 +1958,7 @@ wi::shwi (HOST_WIDE_INT val, enum machine_mode mode)
>
>  /* Produce the smallest number that is represented in MODE.  The precision
>     is taken from MODE and the sign from SGN.  */
> -inline wide_int
> +ALWAYS_INLINE wide_int
>  wi::min_value (enum machine_mode mode, signop sgn)
>  {
>    return min_value (GET_MODE_PRECISION (mode), sgn);
> @@ -1965,7 +1966,7 @@ wi::min_value (enum machine_mode mode, signop sgn)
>
>  /* Produce the largest number that is represented in MODE.  The precision
>     is taken from MODE and the sign from SGN.  */
> -inline wide_int
> +ALWAYS_INLINE wide_int
>  wi::max_value (enum machine_mode mode, signop sgn)
>  {
>    return max_value (GET_MODE_PRECISION (mode), sgn);
> @@ -2007,7 +2008,7 @@ extern enum rtx_code get_index_code (const struct address_info *);
>  /* Return the cost of SET X.  SPEED_P is true if optimizing for speed
>     rather than size.  */
>
> -static inline int
> +static ALWAYS_INLINE int
>  set_rtx_cost (rtx x, bool speed_p)
>  {
>    return rtx_cost (x, INSN, 4, speed_p);
> @@ -2015,7 +2016,7 @@ set_rtx_cost (rtx x, bool speed_p)
>
>  /* Like set_rtx_cost, but return both the speed and size costs in C.  */
>
> -static inline void
> +static ALWAYS_INLINE void
>  get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c)
>  {
>    get_full_rtx_cost (x, INSN, 4, c);
> @@ -2025,7 +2026,7 @@ get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c)
>     of a register move.  SPEED_P is true if optimizing for speed rather
>     than size.  */
>
> -static inline int
> +static ALWAYS_INLINE int
>  set_src_cost (rtx x, bool speed_p)
>  {
>    return rtx_cost (x, SET, 1, speed_p);
> @@ -2033,7 +2034,7 @@ set_src_cost (rtx x, bool speed_p)
>
>  /* Like set_src_cost, but return both the speed and size costs in C.  */
>
> -static inline void
> +static ALWAYS_INLINE void
>  get_full_set_src_cost (rtx x, struct full_rtx_costs *c)
>  {
>    get_full_rtx_cost (x, SET, 1, c);
> @@ -3055,7 +3056,7 @@ extern struct target_rtl *this_target_rtl;
>
>  #ifndef GENERATOR_FILE
>  /* Return the attributes of a MEM rtx.  */
> -static inline struct mem_attrs *
> +static ALWAYS_INLINE struct mem_attrs *
>  get_mem_attrs (const_rtx x)
>  {
>    struct mem_attrs *attrs;
> diff --git a/include/ansidecl.h b/include/ansidecl.h
> index 0fb23bb..9132ee0 100644
> --- a/include/ansidecl.h
> +++ b/include/ansidecl.h
> @@ -306,6 +306,12 @@ So instead we use the macro below and test it against specific values.  */
>  #define ENUM_BITFIELD(TYPE) unsigned int
>  #endif
>
> +#ifdef __GNUC__
> +#define ALWAYS_INLINE __attribute__ ((always_inline)) inline
> +#else
> +#define ALWAYS_INLINE inline
> +#endif
> +
>  #ifdef __cplusplus
>  }
>  #endif
> --
> 2.1.0
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02  7:09 ` Andrew Pinski
@ 2014-09-02  7:20   ` Andi Kleen
  2014-09-02  7:22     ` Andrew Pinski
  0 siblings, 1 reply; 22+ messages in thread
From: Andi Kleen @ 2014-09-02  7:20 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: Andi Kleen, GCC Patches, Andi Kleen, David Malcolm


> there have been bugs in the past in the area of always_inline too.

You're arguing for my patch. It would find those bugs.

-Andi

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02  7:20   ` Andi Kleen
@ 2014-09-02  7:22     ` Andrew Pinski
  2014-09-02  8:37       ` Steven Bosscher
  0 siblings, 1 reply; 22+ messages in thread
From: Andrew Pinski @ 2014-09-02  7:22 UTC (permalink / raw)
  To: Andi Kleen; +Cc: GCC Patches, Andi Kleen, David Malcolm

On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen <andi@firstfloor.org> wrote:
>
>> there have been bugs in the past in the area of always_inline too.
>
> You're arguing for my patch. It would find those bugs.


No I am arguing against it since the older versions of GCC we cannot change.

Thanks,
Andrew

>
> -Andi

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02  7:22     ` Andrew Pinski
@ 2014-09-02  8:37       ` Steven Bosscher
  2014-09-02  8:42         ` pinskia
  2014-09-02  8:43         ` Richard Biener
  0 siblings, 2 replies; 22+ messages in thread
From: Steven Bosscher @ 2014-09-02  8:37 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: Andi Kleen, GCC Patches, Andi Kleen, David Malcolm

On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote:
> On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote:
>>
>>> there have been bugs in the past in the area of always_inline too.
>>
>> You're arguing for my patch. It would find those bugs.
>
>
> No I am arguing against it since the older versions of GCC we cannot change.

Should such bugs turn up, we can account for them in ansidecl.h.

I think Andi's patch should go in.

Ciao!
Steven

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02  8:37       ` Steven Bosscher
@ 2014-09-02  8:42         ` pinskia
  2014-09-02  8:43         ` Richard Biener
  1 sibling, 0 replies; 22+ messages in thread
From: pinskia @ 2014-09-02  8:42 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: Andi Kleen, GCC Patches, Andi Kleen, David Malcolm



> On Sep 2, 2014, at 1:36 AM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
> 
>> On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote:
>>> On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote:
>>> 
>>>> there have been bugs in the past in the area of always_inline too.
>>> 
>>> You're arguing for my patch. It would find those bugs.
>> 
>> 
>> No I am arguing against it since the older versions of GCC we cannot change.
> 
> Should such bugs turn up, we can account for them in ansidecl.h.
> 
> I think Andi's patch should go in.

I does hurt debug ability with older compilers too. So if we need to figure out why stage is being miscompiled it is harder to figure how to work around it.  

I think stage should really be -O0 even with respect of inline.  I think we should never force inline inside gcc even at -O0 as it is just a hack (we know it as we added the attribute in the first place). 

Thanks,
Andrew

> 
> Ciao!
> Steven

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02  8:37       ` Steven Bosscher
  2014-09-02  8:42         ` pinskia
@ 2014-09-02  8:43         ` Richard Biener
  2014-09-02 16:52           ` Andi Kleen
  1 sibling, 1 reply; 22+ messages in thread
From: Richard Biener @ 2014-09-02  8:43 UTC (permalink / raw)
  To: Steven Bosscher
  Cc: Andrew Pinski, Andi Kleen, GCC Patches, Andi Kleen, David Malcolm

On Tue, Sep 2, 2014 at 10:36 AM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
> On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote:
>> On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote:
>>>
>>>> there have been bugs in the past in the area of always_inline too.
>>>
>>> You're arguing for my patch. It would find those bugs.
>>
>>
>> No I am arguing against it since the older versions of GCC we cannot change.
>
> Should such bugs turn up, we can account for them in ansidecl.h.
>
> I think Andi's patch should go in.

I disagree.  always-inline isn't an optimization attribute but a correctness
one.

Instead we should not build stage1 with -O0 if we detect a reasonably
recent GCC host compiler (say one that is still maintained).  Or
we simply should make -finline work at -O0 (I suppose it might already
work?) and use it.

Richard.

> Ciao!
> Steven

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02  7:05 [PATCH] Force rtl templates to be inlined Andi Kleen
  2014-09-02  7:09 ` Andrew Pinski
@ 2014-09-02 14:59 ` David Malcolm
  2014-09-02 17:50   ` Andi Kleen
  1 sibling, 1 reply; 22+ messages in thread
From: David Malcolm @ 2014-09-02 14:59 UTC (permalink / raw)
  To: Andi Kleen; +Cc: gcc-patches, Andi Kleen

On Tue, 2014-09-02 at 00:03 -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> I noticed that with the trunk compiler a range of the new rtl
> inlines show up as hot in a profiler during stage1. I think
> that happens because stage1 is not using optimization
> and does not inline plain "inline".  And these rtl inlines
> are very frequently called.

Sorry about that.

FWIW I'm working on some followup patches for the rtx-classes work that
ought to eliminate some of the is_a_helper<> calls; I hope to post them
in the next few days. [1]

I suspect the bulk of them currently are coming from the safe_as_a
<rtx_insn *> calls within NEXT_INSN and PREV_INSN; do you happen to have
information handy on that?

Dave

[1] (I have to take the rest of today off for a family matter).

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02  8:43         ` Richard Biener
@ 2014-09-02 16:52           ` Andi Kleen
  2014-09-03  9:50             ` Richard Biener
  2014-09-03  9:52             ` Richard Biener
  0 siblings, 2 replies; 22+ messages in thread
From: Andi Kleen @ 2014-09-02 16:52 UTC (permalink / raw)
  To: Richard Biener
  Cc: Steven Bosscher, Andrew Pinski, Andi Kleen, GCC Patches, David Malcolm

> Or we simply should make -finline work at -O0 (I suppose it might already
> work?) and use it.

Yes that's probably better. There are more hot inlines in the stage 1 profile
(like wi::storage_ref or vec::length)
I suspect with the ongoing C++'ification that will get worse.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02 14:59 ` David Malcolm
@ 2014-09-02 17:50   ` Andi Kleen
  2014-09-04 20:07     ` [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN (was Re: [PATCH] Force rtl templates to be inlined) David Malcolm
  0 siblings, 1 reply; 22+ messages in thread
From: Andi Kleen @ 2014-09-02 17:50 UTC (permalink / raw)
  To: David Malcolm; +Cc: Andi Kleen, gcc-patches, Andi Kleen

> I suspect the bulk of them currently are coming from the safe_as_a
> <rtx_insn *> calls within NEXT_INSN and PREV_INSN; do you happen to have
> information handy on that?

Yes that's right:

-   1.03%  lto1                    [.] bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                     â–’
   - bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                                                       â–’
      - 92.20% bool is_a<rtx_insn*, rtx_def>(rtx_def*)                                                                                          â–’
         - 98.53% rtx_insn* safe_as_a<rtx_insn*, rtx_def>(rtx_def*)                                                                             â–’
            - 73.28% NEXT_INSN(rtx_insn const*)                                                                                                 â–’

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02 16:52           ` Andi Kleen
@ 2014-09-03  9:50             ` Richard Biener
  2014-09-04  3:58               ` Andi Kleen
  2014-09-03  9:52             ` Richard Biener
  1 sibling, 1 reply; 22+ messages in thread
From: Richard Biener @ 2014-09-03  9:50 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Steven Bosscher, Andrew Pinski, Andi Kleen, GCC Patches,
	David Malcolm, Jan Hubicka

[-- Attachment #1: Type: text/plain, Size: 1154 bytes --]

On Tue, Sep 2, 2014 at 6:52 PM, Andi Kleen <ak@linux.intel.com> wrote:
>> Or we simply should make -finline work at -O0 (I suppose it might already
>> work?) and use it.
>
> Yes that's probably better. There are more hot inlines in the stage 1 profile
> (like wi::storage_ref or vec::length)
> I suspect with the ongoing C++'ification that will get worse.

Like with this (untested apart from on a small testcase).  Of course it
usually won't help bootstrap because your host compiler doesn't support
-O0 -finline yet.  Also it might not help that much because while it certainly
removes call overhead it will still not optimize anything else.  Also
inline analysis correctly assumes that no stmts go away so you
only have the call overhead as room to allow inlining (so with checking
enabled the is_a/as_a stuff is probably too large - didn't check).

It may also negatively affect debugging (no var-tracking at -O0).

Anyway, removing !optimize checks in favor of flag_no_inline checks
and initializing that properly is a cleanup as well.

Now to see how to best test this ...

Richard.

> -Andi
>
> --
> ak@linux.intel.com -- Speaking for myself only

[-- Attachment #2: make-inlining-work-at-O0 --]
[-- Type: application/octet-stream, Size: 6630 bytes --]

2014-09-03  Richard Biener  <rguenther@suse.de>

	* common.opt (flag_no_inline): Remove Init(0).
	* opts.c (default_options_table): Enable -finline at -O1+.
	(finish_options): Remove code forcefully disabling -finline
	and -Winline at -O0.
	* ipa-inline.c (ipa_inline): Remove !optimize guard.
	(pass_early_inline::execute): Check flag_no_inline instead
	of !optimize.
	(pass_ipa_inline::gate): New function, enabled when we compute
	inline summary.
	* ipa-inline-analysis (compute_inline_parameters): Check
	flag_no_inline instead of !optimize.
	(inline_analyze_function): Likewise.
	(inline_generate_summary): Likewise.

Index: gcc/common.opt
===================================================================
*** gcc/common.opt	(revision 214810)
--- gcc/common.opt	(working copy)
*************** Perform indirect inlining
*** 1380,1386 ****
  ; General flag to enable inlining.  Specifying -fno-inline will disable
  ; all inlining apart from always-inline functions.
  finline
! Common Report Var(flag_no_inline,0) Init(0) Optimization
  Enable inlining of function declared \"inline\", disabling disables all inlining
  
  finline-small-functions
--- 1380,1386 ----
  ; General flag to enable inlining.  Specifying -fno-inline will disable
  ; all inlining apart from always-inline functions.
  finline
! Common Report Var(flag_no_inline,0) Optimization
  Enable inlining of function declared \"inline\", disabling disables all inlining
  
  finline-small-functions
Index: gcc/opts.c
===================================================================
*** gcc/opts.c	(revision 214810)
--- gcc/opts.c	(working copy)
*************** maybe_default_options (struct gcc_option
*** 424,429 ****
--- 424,430 ----
  static const struct default_options default_options_table[] =
    {
      /* -O1 optimizations.  */
+     { OPT_LEVELS_1_PLUS, OPT_finline, NULL, 1 },
      { OPT_LEVELS_1_PLUS, OPT_fdefer_pop, NULL, 1 },
  #ifdef DELAY_SLOTS
      { OPT_LEVELS_1_PLUS, OPT_fdelayed_branch, NULL, 1 },
*************** finish_options (struct gcc_options *opts
*** 732,745 ****
        opts->x_flag_opts_finished = true;
      }
  
-   if (opts->x_optimize == 0)
-     {
-       /* Inlining does not work if not optimizing,
- 	 so force it not to be done.  */
-       opts->x_warn_inline = 0;
-       opts->x_flag_no_inline = 1;
-     }
- 
    /* The optimization to partition hot and cold basic blocks into separate
       sections of the .o and executable files does not work (currently)
       with exception handling.  This is because there is no support for
--- 739,744 ----
Index: gcc/ipa-inline.c
===================================================================
*** gcc/ipa-inline.c	(revision 214810)
--- gcc/ipa-inline.c	(working copy)
*************** ipa_inline (void)
*** 2137,2145 ****
    int cold;
    bool remove_functions = false;
  
-   if (!optimize)
-     return 0;
- 
    order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
  
    if (in_lto_p && optimize)
--- 2137,2142 ----
*************** pass_early_inline::execute (function *fu
*** 2427,2434 ****
       functions.  */
    inlined = inline_always_inline_functions (node);
  
!   if (!optimize
!       || flag_no_inline
        || !flag_early_inlining
        /* Never inline regular functions into always-inline functions
  	 during incremental inlining.  This sucks as functions calling
--- 2424,2430 ----
       functions.  */
    inlined = inline_always_inline_functions (node);
  
!   if (flag_no_inline
        || !flag_early_inlining
        /* Never inline regular functions into always-inline functions
  	 during incremental inlining.  This sucks as functions calling
*************** public:
*** 2539,2544 ****
--- 2535,2542 ----
    {}
  
    /* opt_pass methods: */
+   virtual bool gate (function *) { return !(flag_no_inline
+ 					    && !flag_lto && !flag_wpa); }
    virtual unsigned int execute (function *) { return ipa_inline (); }
  
  }; // class pass_ipa_inline
Index: gcc/ipa-inline-analysis.c
===================================================================
*** gcc/ipa-inline-analysis.c	(revision 214810)
--- gcc/ipa-inline-analysis.c	(working copy)
*************** compute_inline_parameters (struct cgraph
*** 2849,2862 ****
    push_cfun (DECL_STRUCT_FUNCTION (node->decl));
  
    /* Estimate the stack size for the function if we're optimizing.  */
!   self_stack_size = optimize ? estimated_stack_frame_size (node) : 0;
    info->estimated_self_stack_size = self_stack_size;
    info->estimated_stack_size = self_stack_size;
    info->stack_frame_offset = 0;
  
    /* Can this function be inlined at all?  */
!   if (!optimize && !lookup_attribute ("always_inline",
! 				      DECL_ATTRIBUTES (node->decl)))
      info->inlinable = false;
    else
      info->inlinable = tree_inlinable_function_p (node->decl);
--- 2849,2862 ----
    push_cfun (DECL_STRUCT_FUNCTION (node->decl));
  
    /* Estimate the stack size for the function if we're optimizing.  */
!   self_stack_size = !flag_no_inline ? estimated_stack_frame_size (node) : 0;
    info->estimated_self_stack_size = self_stack_size;
    info->estimated_stack_size = self_stack_size;
    info->stack_frame_offset = 0;
  
    /* Can this function be inlined at all?  */
!   if (flag_no_inline && !lookup_attribute ("always_inline",
! 					   DECL_ATTRIBUTES (node->decl)))
      info->inlinable = false;
    else
      info->inlinable = tree_inlinable_function_p (node->decl);
*************** inline_analyze_function (struct cgraph_n
*** 3971,3977 ****
    if (optimize && !node->thunk.thunk_p)
      inline_indirect_intraprocedural_analysis (node);
    compute_inline_parameters (node, false);
!   if (!optimize)
      {
        struct cgraph_edge *e;
        for (e = node->callees; e; e = e->next_callee)
--- 3971,3977 ----
    if (optimize && !node->thunk.thunk_p)
      inline_indirect_intraprocedural_analysis (node);
    compute_inline_parameters (node, false);
!   if (flag_no_inline)
      {
        struct cgraph_edge *e;
        for (e = node->callees; e; e = e->next_callee)
*************** inline_generate_summary (void)
*** 4010,4016 ****
  
    /* When not optimizing, do not bother to analyze.  Inlining is still done
       because edge redirection needs to happen there.  */
!   if (!optimize && !flag_lto && !flag_wpa)
      return;
  
    function_insertion_hook_holder =
--- 4010,4016 ----
  
    /* When not optimizing, do not bother to analyze.  Inlining is still done
       because edge redirection needs to happen there.  */
!   if (flag_no_inline && !flag_lto && !flag_wpa)
      return;
  
    function_insertion_hook_holder =

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-02 16:52           ` Andi Kleen
  2014-09-03  9:50             ` Richard Biener
@ 2014-09-03  9:52             ` Richard Biener
  1 sibling, 0 replies; 22+ messages in thread
From: Richard Biener @ 2014-09-03  9:52 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Steven Bosscher, Andrew Pinski, Andi Kleen, GCC Patches, David Malcolm

On Tue, Sep 2, 2014 at 6:52 PM, Andi Kleen <ak@linux.intel.com> wrote:
>> Or we simply should make -finline work at -O0 (I suppose it might already
>> work?) and use it.
>
> Yes that's probably better. There are more hot inlines in the stage 1 profile
> (like wi::storage_ref or vec::length)
> I suspect with the ongoing C++'ification that will get worse.

Btw, it's C++ which I considered that -Og might replace -O0 exactly
because of all the abstraction penalty which usually doesn't help
debugging at -O0.  Also the idea was that -Og might even compile
faster than -O0, but that's far from true unfortunately ...

Richard.

> -Andi
>
> --
> ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-03  9:50             ` Richard Biener
@ 2014-09-04  3:58               ` Andi Kleen
  2014-09-04 13:02                 ` Richard Biener
  0 siblings, 1 reply; 22+ messages in thread
From: Andi Kleen @ 2014-09-04  3:58 UTC (permalink / raw)
  To: Richard Biener
  Cc: Andi Kleen, Steven Bosscher, Andrew Pinski, Andi Kleen,
	GCC Patches, David Malcolm, Jan Hubicka

> 
> Anyway, removing !optimize checks in favor of flag_no_inline checks
> and initializing that properly is a cleanup as well.

Patch looks good to me.
-Andi

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-04  3:58               ` Andi Kleen
@ 2014-09-04 13:02                 ` Richard Biener
  2014-09-04 18:52                   ` Jan Hubicka
  0 siblings, 1 reply; 22+ messages in thread
From: Richard Biener @ 2014-09-04 13:02 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andi Kleen, Steven Bosscher, Andrew Pinski, GCC Patches,
	David Malcolm, Jan Hubicka

On Thu, Sep 4, 2014 at 5:58 AM, Andi Kleen <andi@firstfloor.org> wrote:
>>
>> Anyway, removing !optimize checks in favor of flag_no_inline checks
>> and initializing that properly is a cleanup as well.
>
> Patch looks good to me.

Unfortunately it doesn't pass bootstrap (inline-summary re-use
between IPA passes is a maze...).  So I need to tweak it more
(pushed back for now).

Richard.

> -Andi

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Force rtl templates to be inlined
  2014-09-04 13:02                 ` Richard Biener
@ 2014-09-04 18:52                   ` Jan Hubicka
  0 siblings, 0 replies; 22+ messages in thread
From: Jan Hubicka @ 2014-09-04 18:52 UTC (permalink / raw)
  To: Richard Biener
  Cc: Andi Kleen, Andi Kleen, Steven Bosscher, Andrew Pinski,
	GCC Patches, David Malcolm, Jan Hubicka

> On Thu, Sep 4, 2014 at 5:58 AM, Andi Kleen <andi@firstfloor.org> wrote:
> >>
> >> Anyway, removing !optimize checks in favor of flag_no_inline checks
> >> and initializing that properly is a cleanup as well.
> >
> > Patch looks good to me.
> 
> Unfortunately it doesn't pass bootstrap (inline-summary re-use
> between IPA passes is a maze...).  So I need to tweak it more
> (pushed back for now).

From a quick glance it looks resonable thing to do.
I can fix the summary issues after returning from trip this Sunday.

Honza
> 
> Richard.
> 
> > -Andi

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN  (was Re: [PATCH] Force rtl templates to be inlined)
  2014-09-02 17:50   ` Andi Kleen
@ 2014-09-04 20:07     ` David Malcolm
  2014-09-04 20:46       ` Jakub Jelinek
  2014-09-05 18:45       ` Jeff Law
  0 siblings, 2 replies; 22+ messages in thread
From: David Malcolm @ 2014-09-04 20:07 UTC (permalink / raw)
  To: Andi Kleen; +Cc: gcc-patches, Andi Kleen

[-- Attachment #1: Type: text/plain, Size: 2358 bytes --]

On Tue, 2014-09-02 at 19:50 +0200, Andi Kleen wrote:
> > I suspect the bulk of them currently are coming from the safe_as_a
> > <rtx_insn *> calls within NEXT_INSN and PREV_INSN; do you happen to have
> > information handy on that?
> 
> Yes that's right:
> 
> -   1.03%  lto1                    [.] bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                     â–’
>    - bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                                                       â–’
>       - 92.20% bool is_a<rtx_insn*, rtx_def>(rtx_def*)                                                                                          â–’
>          - 98.53% rtx_insn* safe_as_a<rtx_insn*, rtx_def>(rtx_def*)                                                                             â–’
>             - 73.28% NEXT_INSN(rtx_insn const*)                                                                                                 â–’

The is_a_helper for rtx_insn * is non-trivial, so it may be worth
avoiding it, even when inlined.

The attached patch rewrites the inline NEXT_INSN/PREV_INSN to avoid
doing the safe_as_a, instead tightening up the interface so that one can
only set them to an insn, and introducing a new XINSN access macro and
corresponding rt_insn member of the union.

Bootstrapped on x86_64-unknown-linux-gnu (Fedora 20), and has been
rebuilt as part of a config-list.mk build for all working configurations
(albeit with other patches for the latter case).

OK for trunk?

gcc/
	* rtl.h (union rtunion): Add new member "rt_insn", of type
	rtx_insn *.
	(XINSN): New accessor macro, accessing as an rtx_insn *,
	requiring code "u".
	(PREV_INSN, NEXT_INSN): Eliminate the checked cast to rtx_insn *
	and instead directly use XINSN.
	(SET_PREV_INSN, SET_NEXT_INSN): Strengthen the return type from
	rtx & to rtx_insn *&, using XINSN internally.
	(NEXT_INSN): Eliminate the checked cast and instead directly use
	XINSN.

	* cfgrtl.c (fixup_abnormal_edges): Use NULL rather than NULL_RTX
	when assigning to SET_PREV_INSN/SET_NEXT_INSN.
	* haifa-sched.c (remove_notes): Likewise.
	* sel-sched-ir.c (sel_remove_insn): Likewise.
	(get_bb_note_from_pool): Likewise.
	* config/ia64/ia64.c (ia64_init_dfa_pre_cycle_insn): Likewise.
	(ia64_reorg): Likewise.



[-- Attachment #2: 0001-Add-XINSN-macro-and-use-it-within-NEXT_INSN-PREV_INS.patch --]
[-- Type: text/x-patch, Size: 5878 bytes --]

From 6e60e29211314b5865bc7b5b05d586777d96815f Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Wed, 3 Sep 2014 11:01:37 -0400
Subject: [PATCH 01/32] Add XINSN macro and use it within NEXT_INSN/PREV_INSN

gcc/
	* rtl.h (union rtunion): Add new member "rt_insn", of type
	rtx_insn *.
	(XINSN): New accessor macro, accessing as an rtx_insn *,
	requiring code "u".
	(PREV_INSN, NEXT_INSN): Eliminate the checked cast to rtx_insn *
	and instead directly use XINSN.
	(SET_PREV_INSN, SET_NEXT_INSN): Strengthen the return type from
	rtx & to rtx_insn *&, using XINSN internally.
	(NEXT_INSN): Eliminate the checked cast and instead directly use
	XINSN.

	* cfgrtl.c (fixup_abnormal_edges): Use NULL rather than NULL_RTX
	when assigning to SET_PREV_INSN/SET_NEXT_INSN.
	* haifa-sched.c (remove_notes): Likewise.
	* sel-sched-ir.c (sel_remove_insn): Likewise.
	(get_bb_note_from_pool): Likewise.
	* config/ia64/ia64.c (ia64_init_dfa_pre_cycle_insn): Likewise.
	(ia64_reorg): Likewise.
---
 gcc/cfgrtl.c           |  4 ++--
 gcc/config/ia64/ia64.c |  6 +++---
 gcc/haifa-sched.c      |  2 +-
 gcc/rtl.h              | 16 ++++++++--------
 gcc/sel-sched-ir.c     |  8 ++++----
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index bc6c965..7a03d78 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -3279,8 +3279,8 @@ fixup_abnormal_edges (void)
 			{
 			  /* We're not deleting it, we're moving it.  */
 			  INSN_DELETED_P (insn) = 0;
-			  SET_PREV_INSN (insn) = NULL_RTX;
-			  SET_NEXT_INSN (insn) = NULL_RTX;
+			  SET_PREV_INSN (insn) = NULL;
+			  SET_NEXT_INSN (insn) = NULL;
 
 			  insert_insn_on_edge (insn, e);
 			  inserted = true;
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 2ed5ddd..e73a489 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -9496,10 +9496,10 @@ ia64_init_dfa_pre_cycle_insn (void)
       prev_cycle_state = xmalloc (dfa_state_size);
     }
   dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
-  SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
+  SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL;
   recog_memoized (dfa_pre_cycle_insn);
   dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
-  SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
+  SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL;
   recog_memoized (dfa_stop_insn);
 }
 
@@ -9687,7 +9687,7 @@ ia64_reorg (void)
 
       initiate_bundle_states ();
       ia64_nop = make_insn_raw (gen_nop ());
-      SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
+      SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL;
       recog_memoized (ia64_nop);
       clocks_length = get_max_uid () + 1;
       stops_p = XCNEWVEC (char, clocks_length);
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 1ebfcdb..9ebe8f0 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -4069,7 +4069,7 @@ remove_notes (rtx_insn *head, rtx_insn *tail)
 
 	  /* Add the note to list that ends at NOTE_LIST.  */
 	  SET_PREV_INSN (insn) = note_list;
-	  SET_NEXT_INSN (insn) = NULL_RTX;
+	  SET_NEXT_INSN (insn) = NULL;
 	  if (note_list)
 	    SET_NEXT_INSN (note_list) = insn;
 	  note_list = insn;
diff --git a/gcc/rtl.h b/gcc/rtl.h
index beeed2f..27751db 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -195,6 +195,7 @@ union rtunion
   unsigned int rt_uint;
   const char *rt_str;
   rtx rt_rtx;
+  rtx_insn *rt_insn;
   rtvec rt_rtvec;
   enum machine_mode rt_type;
   addr_diff_vec_flags rt_addr_diff_vec_flags;
@@ -1208,6 +1209,7 @@ extern void rtl_check_failed_flag (const char *, const_rtx, const char *,
 #define XUINT(RTX, N)   (RTL_CHECK2 (RTX, N, 'i', 'n').rt_uint)
 #define XSTR(RTX, N)	(RTL_CHECK2 (RTX, N, 's', 'S').rt_str)
 #define XEXP(RTX, N)	(RTL_CHECK2 (RTX, N, 'e', 'u').rt_rtx)
+#define XINSN(RTX, N)	(RTL_CHECK1 (RTX, N, 'u').rt_insn)
 #define XVEC(RTX, N)	(RTL_CHECK2 (RTX, N, 'E', 'V').rt_rtvec)
 #define XMODE(RTX, N)	(RTL_CHECK1 (RTX, N, 'M').rt_type)
 #define XTREE(RTX, N)   (RTL_CHECK1 (RTX, N, 't').rt_tree)
@@ -1323,24 +1325,22 @@ inline int& INSN_UID (rtx insn)
 
 inline rtx_insn *PREV_INSN (const rtx_insn *insn)
 {
-  rtx prev = XEXP (insn, 0);
-  return safe_as_a <rtx_insn *> (prev);
+  return XINSN (insn, 0);
 }
 
-inline rtx& SET_PREV_INSN (rtx_insn *insn)
+inline rtx_insn *& SET_PREV_INSN (rtx_insn *insn)
 {
-  return XEXP (insn, 0);
+  return XINSN (insn, 0);
 }
 
 inline rtx_insn *NEXT_INSN (const rtx_insn *insn)
 {
-  rtx next = XEXP (insn, 1);
-  return safe_as_a <rtx_insn *> (next);
+  return XINSN (insn, 1);
 }
 
-inline rtx& SET_NEXT_INSN (rtx_insn *insn)
+inline rtx_insn *& SET_NEXT_INSN (rtx_insn *insn)
 {
-  return XEXP (insn, 1);
+  return XINSN (insn, 1);
 }
 
 inline basic_block BLOCK_FOR_INSN (const_rtx insn)
diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index 02dc8f2..b40b424 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -3948,8 +3948,8 @@ sel_remove_insn (insn_t insn, bool only_disconnect, bool full_tidying)
   /* It is necessary to NULL these fields in case we are going to re-insert
      INSN into the insns stream, as will usually happen in the ONLY_DISCONNECT
      case, but also for NOPs that we will return to the nop pool.  */
-  SET_PREV_INSN (insn) = NULL_RTX;
-  SET_NEXT_INSN (insn) = NULL_RTX;
+  SET_PREV_INSN (insn) = NULL;
+  SET_NEXT_INSN (insn) = NULL;
   set_block_for_insn (insn, NULL);
 
   return tidy_control_flow (bb, full_tidying);
@@ -4991,8 +4991,8 @@ get_bb_note_from_pool (void)
     {
       rtx_note *note = bb_note_pool.pop ();
 
-      SET_PREV_INSN (note) = NULL_RTX;
-      SET_NEXT_INSN (note) = NULL_RTX;
+      SET_PREV_INSN (note) = NULL;
+      SET_NEXT_INSN (note) = NULL;
 
       return note;
     }
-- 
1.8.5.3


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN  (was Re: [PATCH] Force rtl templates to be inlined)
  2014-09-04 20:07     ` [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN (was Re: [PATCH] Force rtl templates to be inlined) David Malcolm
@ 2014-09-04 20:46       ` Jakub Jelinek
  2014-09-04 21:23         ` David Malcolm
  2014-09-05 18:32         ` Jeff Law
  2014-09-05 18:45       ` Jeff Law
  1 sibling, 2 replies; 22+ messages in thread
From: Jakub Jelinek @ 2014-09-04 20:46 UTC (permalink / raw)
  To: David Malcolm; +Cc: Andi Kleen, gcc-patches, Andi Kleen

On Thu, Sep 04, 2014 at 04:04:17PM -0400, David Malcolm wrote:
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -195,6 +195,7 @@ union rtunion
>    unsigned int rt_uint;
>    const char *rt_str;
>    rtx rt_rtx;
> +  rtx_insn *rt_insn;
>    rtvec rt_rtvec;
>    enum machine_mode rt_type;
>    addr_diff_vec_flags rt_addr_diff_vec_flags;
> @@ -1208,6 +1209,7 @@ extern void rtl_check_failed_flag (const char *, const_rtx, const char *,
>  #define XUINT(RTX, N)   (RTL_CHECK2 (RTX, N, 'i', 'n').rt_uint)
>  #define XSTR(RTX, N)	(RTL_CHECK2 (RTX, N, 's', 'S').rt_str)
>  #define XEXP(RTX, N)	(RTL_CHECK2 (RTX, N, 'e', 'u').rt_rtx)

Shouldn't XEXP be changed into RTL_CHECK1 (RTX, N, 'e').rt_rtx
then and the accessors of first operand of INSN_LIST and only operand of
LABEL_REF be changed to XINSN too?  Of course doesn't have to be done
immediately, can be done as a followup.

> +#define XINSN(RTX, N)	(RTL_CHECK1 (RTX, N, 'u').rt_insn)
>  #define XVEC(RTX, N)	(RTL_CHECK2 (RTX, N, 'E', 'V').rt_rtvec)
>  #define XMODE(RTX, N)	(RTL_CHECK1 (RTX, N, 'M').rt_type)
>  #define XTREE(RTX, N)   (RTL_CHECK1 (RTX, N, 't').rt_tree)

	Jakub

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN  (was Re: [PATCH] Force rtl templates to be inlined)
  2014-09-04 20:46       ` Jakub Jelinek
@ 2014-09-04 21:23         ` David Malcolm
  2014-09-05 18:41           ` Jeff Law
  2014-09-05 18:32         ` Jeff Law
  1 sibling, 1 reply; 22+ messages in thread
From: David Malcolm @ 2014-09-04 21:23 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Andi Kleen, gcc-patches, Andi Kleen

On Thu, 2014-09-04 at 22:22 +0200, Jakub Jelinek wrote:
> On Thu, Sep 04, 2014 at 04:04:17PM -0400, David Malcolm wrote:
> > --- a/gcc/rtl.h
> > +++ b/gcc/rtl.h
> > @@ -195,6 +195,7 @@ union rtunion
> >    unsigned int rt_uint;
> >    const char *rt_str;
> >    rtx rt_rtx;
> > +  rtx_insn *rt_insn;
> >    rtvec rt_rtvec;
> >    enum machine_mode rt_type;
> >    addr_diff_vec_flags rt_addr_diff_vec_flags;
> > @@ -1208,6 +1209,7 @@ extern void rtl_check_failed_flag (const char *, const_rtx, const char *,
> >  #define XUINT(RTX, N)   (RTL_CHECK2 (RTX, N, 'i', 'n').rt_uint)
> >  #define XSTR(RTX, N)	(RTL_CHECK2 (RTX, N, 's', 'S').rt_str)
> >  #define XEXP(RTX, N)	(RTL_CHECK2 (RTX, N, 'e', 'u').rt_rtx)
> 
> Shouldn't XEXP be changed into RTL_CHECK1 (RTX, N, 'e').rt_rtx
> then and the accessors of first operand of INSN_LIST and only operand of
> LABEL_REF be changed to XINSN too?  Of course doesn't have to be done
> immediately, can be done as a followup.

I suppose so, but I don't see an easy way of locating such XEXP users
beyond building with ENABLE_RTL_CHECKING, and checking on every
configuration, and trying to exercise all code paths.  Can we consider
that a *long-term* followup?

Use of XINSN does introduce a hole in the rtx-classes type-safety
scheme, in that it's only checked with ENABLE_RTL_CHECKING.  I
considered it acceptable in the given patch since the
SET_NEXT_INSN/SET_PREV_INSN lvalue accessors return an rtx_insn *& and
thus will only accept an rtx_insn * as an rvalue - but I'm a bit nervous
about its use elsewhere in the code; I'd rather we used rtx_insn_list
for homogeneous lists of INSN_LIST nodes, and maybe an rtx_label_ref
class for LABEL_REF, and perhaps an rtx_reg_note class for heterogeneous
lists of INSN_LIST/EXPR_LIST/INT_LIST nodes for REG_NOTES.

> > +#define XINSN(RTX, N)	(RTL_CHECK1 (RTX, N, 'u').rt_insn)
> >  #define XVEC(RTX, N)	(RTL_CHECK2 (RTX, N, 'E', 'V').rt_rtvec)
> >  #define XMODE(RTX, N)	(RTL_CHECK1 (RTX, N, 'M').rt_type)
> >  #define XTREE(RTX, N)   (RTL_CHECK1 (RTX, N, 't').rt_tree)
> 
> 	Jakub

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN  (was Re: [PATCH] Force rtl templates to be inlined)
  2014-09-04 20:46       ` Jakub Jelinek
  2014-09-04 21:23         ` David Malcolm
@ 2014-09-05 18:32         ` Jeff Law
  1 sibling, 0 replies; 22+ messages in thread
From: Jeff Law @ 2014-09-05 18:32 UTC (permalink / raw)
  To: Jakub Jelinek, David Malcolm; +Cc: Andi Kleen, gcc-patches, Andi Kleen

On 09/04/14 14:22, Jakub Jelinek wrote:
> On Thu, Sep 04, 2014 at 04:04:17PM -0400, David Malcolm wrote:
>> --- a/gcc/rtl.h
>> +++ b/gcc/rtl.h
>> @@ -195,6 +195,7 @@ union rtunion
>>     unsigned int rt_uint;
>>     const char *rt_str;
>>     rtx rt_rtx;
>> +  rtx_insn *rt_insn;
>>     rtvec rt_rtvec;
>>     enum machine_mode rt_type;
>>     addr_diff_vec_flags rt_addr_diff_vec_flags;
>> @@ -1208,6 +1209,7 @@ extern void rtl_check_failed_flag (const char *, const_rtx, const char *,
>>   #define XUINT(RTX, N)   (RTL_CHECK2 (RTX, N, 'i', 'n').rt_uint)
>>   #define XSTR(RTX, N)	(RTL_CHECK2 (RTX, N, 's', 'S').rt_str)
>>   #define XEXP(RTX, N)	(RTL_CHECK2 (RTX, N, 'e', 'u').rt_rtx)
>
> Shouldn't XEXP be changed into RTL_CHECK1 (RTX, N, 'e').rt_rtx
> then and the accessors of first operand of INSN_LIST and only operand of
> LABEL_REF be changed to XINSN too?  Of course doesn't have to be done
> immediately, can be done as a followup.
Yea, let's chase this as a follow-up.

Jeff

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN  (was Re: [PATCH] Force rtl templates to be inlined)
  2014-09-04 21:23         ` David Malcolm
@ 2014-09-05 18:41           ` Jeff Law
  0 siblings, 0 replies; 22+ messages in thread
From: Jeff Law @ 2014-09-05 18:41 UTC (permalink / raw)
  To: David Malcolm, Jakub Jelinek; +Cc: Andi Kleen, gcc-patches, Andi Kleen

On 09/04/14 15:19, David Malcolm wrote:
>
> I suppose so, but I don't see an easy way of locating such XEXP users
> beyond building with ENABLE_RTL_CHECKING, and checking on every
> configuration, and trying to exercise all code paths.  Can we consider
> that a *long-term* followup?
Well, XEXP isn't supposed to be used to access integer fields (we have 
XINT for that).  Any code doing so is already broken, IMHO.  And 
anything doing so is going to have some ugly cast from an rtx to some 
integer type -- a strong hint that the original author any any reviewer 
missed something :-)

So I'd be comfortable with a more limited set of testing (say bootstrap 
x86_64 and perhaps one or two other platforms).  Evaluate anything 
found, and assuming we're not finding anything terribly unexpected, go 
for it.


>
> Use of XINSN does introduce a hole in the rtx-classes type-safety
> scheme, in that it's only checked with ENABLE_RTL_CHECKING.
I see XINSN as a relatively short term wart.  Our goal ought to be to 
make it go away, but I can see how we need it in the immediate term.


Jeff

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN  (was Re: [PATCH] Force rtl templates to be inlined)
  2014-09-04 20:07     ` [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN (was Re: [PATCH] Force rtl templates to be inlined) David Malcolm
  2014-09-04 20:46       ` Jakub Jelinek
@ 2014-09-05 18:45       ` Jeff Law
  2014-09-05 19:05         ` David Malcolm
  1 sibling, 1 reply; 22+ messages in thread
From: Jeff Law @ 2014-09-05 18:45 UTC (permalink / raw)
  To: David Malcolm, Andi Kleen; +Cc: gcc-patches, Andi Kleen

On 09/04/14 14:04, David Malcolm wrote:
> On Tue, 2014-09-02 at 19:50 +0200, Andi Kleen wrote:
>>> I suspect the bulk of them currently are coming from the safe_as_a
>>> <rtx_insn *> calls within NEXT_INSN and PREV_INSN; do you happen to have
>>> information handy on that?
>>
>> Yes that's right:
>>
>> -   1.03%  lto1                    [.] bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                     â–’
>>     - bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                                                       â–’
>>        - 92.20% bool is_a<rtx_insn*, rtx_def>(rtx_def*)                                                                                          â–’
>>           - 98.53% rtx_insn* safe_as_a<rtx_insn*, rtx_def>(rtx_def*)                                                                             â–’
>>              - 73.28% NEXT_INSN(rtx_insn const*)                                                                                                 â–’
>
> The is_a_helper for rtx_insn * is non-trivial, so it may be worth
> avoiding it, even when inlined.
>
> The attached patch rewrites the inline NEXT_INSN/PREV_INSN to avoid
> doing the safe_as_a, instead tightening up the interface so that one can
> only set them to an insn, and introducing a new XINSN access macro and
> corresponding rt_insn member of the union.
>
> Bootstrapped on x86_64-unknown-linux-gnu (Fedora 20), and has been
> rebuilt as part of a config-list.mk build for all working configurations
> (albeit with other patches for the latter case).
>
> OK for trunk?
So is this just to deal with the overhead in the safe_as_a helper until 
we can strengthen more code?  And is that overhead significant in an 
optimized build?

Would an alternate approach be to make the checking in safe_as_a 
conditionalized on ENABLE_CHECKING?

jeff

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN  (was Re: [PATCH] Force rtl templates to be inlined)
  2014-09-05 18:45       ` Jeff Law
@ 2014-09-05 19:05         ` David Malcolm
  0 siblings, 0 replies; 22+ messages in thread
From: David Malcolm @ 2014-09-05 19:05 UTC (permalink / raw)
  To: Jeff Law; +Cc: Andi Kleen, gcc-patches, Andi Kleen

[-- Attachment #1: Type: text/plain, Size: 2235 bytes --]

On Fri, 2014-09-05 at 12:45 -0600, Jeff Law wrote:
> On 09/04/14 14:04, David Malcolm wrote:
> > On Tue, 2014-09-02 at 19:50 +0200, Andi Kleen wrote:
> >>> I suspect the bulk of them currently are coming from the safe_as_a
> >>> <rtx_insn *> calls within NEXT_INSN and PREV_INSN; do you happen to have
> >>> information handy on that?
> >>
> >> Yes that's right:
> >>
> >> -   1.03%  lto1                    [.] bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                     â–’
> >>     - bool is_a_helper<rtx_insn*>::test<rtx_def>(rtx_def*)                                                                                       â–’
> >>        - 92.20% bool is_a<rtx_insn*, rtx_def>(rtx_def*)                                                                                          â–’
> >>           - 98.53% rtx_insn* safe_as_a<rtx_insn*, rtx_def>(rtx_def*)                                                                             â–’
> >>              - 73.28% NEXT_INSN(rtx_insn const*)                                                                                                 â–’
> >
> > The is_a_helper for rtx_insn * is non-trivial, so it may be worth
> > avoiding it, even when inlined.
> >
> > The attached patch rewrites the inline NEXT_INSN/PREV_INSN to avoid
> > doing the safe_as_a, instead tightening up the interface so that one can
> > only set them to an insn, and introducing a new XINSN access macro and
> > corresponding rt_insn member of the union.
> >
> > Bootstrapped on x86_64-unknown-linux-gnu (Fedora 20), and has been
> > rebuilt as part of a config-list.mk build for all working configurations
> > (albeit with other patches for the latter case).
> >
> > OK for trunk?
> So is this just to deal with the overhead in the safe_as_a helper until 
> we can strengthen more code?  And is that overhead significant in an 
> optimized build?
> 
> Would an alternate approach be to make the checking in safe_as_a 
> conditionalized on ENABLE_CHECKING?

I wrote the attached patch to do that, but I don't yet have numbers to
back it up.

Bootstrapped with current settings, and smoketested with
--enable-checking=release (both on Fedora 20 x86_64).

[-- Attachment #2: 0001-Ensure-that-safe_as_a-can-have-no-performance-overhe.patch --]
[-- Type: text/x-patch, Size: 960 bytes --]

From 24b98616c4425a8ef380d2d5fef00f82af9df985 Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Fri, 29 Aug 2014 17:04:49 -0400
Subject: [PATCH] Ensure that safe_as_a can have no performance overhead in a
 release build

gcc/
	* is-a.h (safe_as_a): Eliminate the conditional from the
	!defined(ENABLE_CHECKING) case, to ensure there can be no
	performance degradation of the release build.
---
 gcc/is-a.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/gcc/is-a.h b/gcc/is-a.h
index 176066b..f0c05b0 100644
--- a/gcc/is-a.h
+++ b/gcc/is-a.h
@@ -200,6 +200,7 @@ template <typename T, typename U>
 inline T
 safe_as_a (U *p)
 {
+#ifdef ENABLE_CHECKING
   if (p)
     {
       gcc_checking_assert (is_a <T> (p));
@@ -207,6 +208,9 @@ safe_as_a (U *p)
     }
   else
     return NULL;
+#else
+  return is_a_helper <T>::cast (p);
+#endif
 }
 
 /* A generic checked conversion from a base type U to a derived type T.  See
-- 
1.8.5.3


^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2014-09-05 19:05 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-02  7:05 [PATCH] Force rtl templates to be inlined Andi Kleen
2014-09-02  7:09 ` Andrew Pinski
2014-09-02  7:20   ` Andi Kleen
2014-09-02  7:22     ` Andrew Pinski
2014-09-02  8:37       ` Steven Bosscher
2014-09-02  8:42         ` pinskia
2014-09-02  8:43         ` Richard Biener
2014-09-02 16:52           ` Andi Kleen
2014-09-03  9:50             ` Richard Biener
2014-09-04  3:58               ` Andi Kleen
2014-09-04 13:02                 ` Richard Biener
2014-09-04 18:52                   ` Jan Hubicka
2014-09-03  9:52             ` Richard Biener
2014-09-02 14:59 ` David Malcolm
2014-09-02 17:50   ` Andi Kleen
2014-09-04 20:07     ` [PATCH] Add XINSN macro and use it within NEXT_INSN/PREV_INSN (was Re: [PATCH] Force rtl templates to be inlined) David Malcolm
2014-09-04 20:46       ` Jakub Jelinek
2014-09-04 21:23         ` David Malcolm
2014-09-05 18:41           ` Jeff Law
2014-09-05 18:32         ` Jeff Law
2014-09-05 18:45       ` Jeff Law
2014-09-05 19:05         ` David Malcolm

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).