From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 11567 invoked by alias); 2 Sep 2014 07:09:32 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 11550 invoked by uid 89); 2 Sep 2014 07:09:30 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.4 required=5.0 tests=AWL,BAYES_00,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-lb0-f179.google.com Received: from mail-lb0-f179.google.com (HELO mail-lb0-f179.google.com) (209.85.217.179) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Tue, 02 Sep 2014 07:09:28 +0000 Received: by mail-lb0-f179.google.com with SMTP id l4so6880967lbv.24 for ; Tue, 02 Sep 2014 00:09:24 -0700 (PDT) MIME-Version: 1.0 X-Received: by 10.112.60.33 with SMTP id e1mr30544985lbr.36.1409641764586; Tue, 02 Sep 2014 00:09:24 -0700 (PDT) Received: by 10.25.23.204 with HTTP; Tue, 2 Sep 2014 00:09:24 -0700 (PDT) In-Reply-To: <1409641427-29875-1-git-send-email-andi@firstfloor.org> References: <1409641427-29875-1-git-send-email-andi@firstfloor.org> Date: Tue, 02 Sep 2014 07:09:00 -0000 Message-ID: Subject: Re: [PATCH] Force rtl templates to be inlined From: Andrew Pinski To: Andi Kleen Cc: GCC Patches , Andi Kleen , David Malcolm Content-Type: text/plain; charset=UTF-8 X-IsSubscribed: yes X-SW-Source: 2014-09/txt/msg00098.txt.bz2 On Tue, Sep 2, 2014 at 12:03 AM, Andi Kleen wrote: > From: Andi Kleen > > I noticed that with the trunk compiler a range of the new rtl > inlines show up as hot in a profiler during stage1. I think > that happens because stage1 is not using optimization > and does not inline plain "inline". And these rtl inlines > are very frequently called. > > Mark them all with __attribute__((always_inline)) which forces > inlining even with -O0. I think this is wrong and should not be committed. stage1 is designed to be without optimization and there have been bugs in the past in the area of always_inline too. Thanks, Andrew Pinski > > Passes bootstrap and testing on x86_64-linux. > > Cc: dmalcolm@redhat.com > > include/: > > 2014-09-01 Andi Kleen > > * ansidecl.h (ALWAYS_INLINE): Add. > > gcc/: > > 2014-09-01 Andi Kleen > > * rtl.h (is_a_helper): Change inline to ALWAYS_INLINE. > (rhs_regno): Dito. > (init_costs_to_max): Dito. > (init_costs_to_zero): Dito. > (costs_lt_p): Dito. > (costs_add_n_insns): Dito. > (wi::int_traits ::get_precision): Dito. > (wi::shwi): Dito. > (wi::min_value): Dito. > (wi::max_value): Dito. > (set_rtx_cost): Dito. > (get_full_set_rtx_cost): Dito. > (set_src_cost): Dito. > (get_full_set_src_cost): Dito. > (get_mem_attrs): Dito. > --- > gcc/rtl.h | 111 +++++++++++++++++++++++++++-------------------------- > include/ansidecl.h | 6 +++ > 2 files changed, 62 insertions(+), 55 deletions(-) > > diff --git a/gcc/rtl.h b/gcc/rtl.h > index beeed2f..d711e43 100644 > --- a/gcc/rtl.h > +++ b/gcc/rtl.h > @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see > #define GCC_RTL_H > > #include > +#include "ansidecl.h" > #include "statistics.h" > #include "machmode.h" > #include "input.h" > @@ -418,7 +419,7 @@ public: > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return rt->code == EXPR_LIST; > @@ -447,7 +448,7 @@ public: > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return rt->code == INSN_LIST; > @@ -474,7 +475,7 @@ public: > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return rt->code == SEQUENCE; > @@ -482,7 +483,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (const_rtx rt) > { > return rt->code == SEQUENCE; > @@ -778,7 +779,7 @@ struct GTY(()) rtvec_def { > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return (INSN_P (rt) > @@ -790,7 +791,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (const_rtx rt) > { > return (INSN_P (rt) > @@ -802,7 +803,7 @@ is_a_helper ::test (const_rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return DEBUG_INSN_P (rt); > @@ -810,7 +811,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return NONJUMP_INSN_P (rt); > @@ -818,7 +819,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return JUMP_P (rt); > @@ -826,7 +827,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return CALL_P (rt); > @@ -834,7 +835,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx_insn *insn) > { > return CALL_P (insn); > @@ -842,7 +843,7 @@ is_a_helper ::test (rtx_insn *insn) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return JUMP_TABLE_DATA_P (rt); > @@ -850,7 +851,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx_insn *insn) > { > return JUMP_TABLE_DATA_P (insn); > @@ -858,7 +859,7 @@ is_a_helper ::test (rtx_insn *insn) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return BARRIER_P (rt); > @@ -866,7 +867,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return LABEL_P (rt); > @@ -874,7 +875,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx_insn *insn) > { > return LABEL_P (insn); > @@ -882,7 +883,7 @@ is_a_helper ::test (rtx_insn *insn) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx rt) > { > return NOTE_P (rt); > @@ -890,7 +891,7 @@ is_a_helper ::test (rtx rt) > > template <> > template <> > -inline bool > +ALWAYS_INLINE bool > is_a_helper ::test (rtx_insn *insn) > { > return NOTE_P (insn); > @@ -1257,26 +1258,26 @@ extern void rtl_check_failed_flag (const char *, const_rtx, const char *, > > /* Methods of rtx_expr_list. */ > > -inline rtx_expr_list *rtx_expr_list::next () const > +ALWAYS_INLINE rtx_expr_list *rtx_expr_list::next () const > { > rtx tmp = XEXP (this, 1); > return safe_as_a (tmp); > } > > -inline rtx rtx_expr_list::element () const > +ALWAYS_INLINE rtx rtx_expr_list::element () const > { > return XEXP (this, 0); > } > > /* Methods of rtx_insn_list. */ > > -inline rtx_insn_list *rtx_insn_list::next () const > +ALWAYS_INLINE rtx_insn_list *rtx_insn_list::next () const > { > rtx tmp = XEXP (this, 1); > return safe_as_a (tmp); > } > > -inline rtx_insn *rtx_insn_list::insn () const > +ALWAYS_INLINE rtx_insn *rtx_insn_list::insn () const > { > rtx tmp = XEXP (this, 0); > return safe_as_a (tmp); > @@ -1284,17 +1285,17 @@ inline rtx_insn *rtx_insn_list::insn () const > > /* Methods of rtx_sequence. */ > > -inline int rtx_sequence::len () const > +ALWAYS_INLINE int rtx_sequence::len () const > { > return XVECLEN (this, 0); > } > > -inline rtx rtx_sequence::element (int index) const > +ALWAYS_INLINE rtx rtx_sequence::element (int index) const > { > return XVECEXP (this, 0, index); > } > > -inline rtx_insn *rtx_sequence::insn (int index) const > +ALWAYS_INLINE rtx_insn *rtx_sequence::insn (int index) const > { > return as_a (XVECEXP (this, 0, index)); > } > @@ -1303,12 +1304,12 @@ inline rtx_insn *rtx_sequence::insn (int index) const > > /* Holds a unique number for each insn. > These are not necessarily sequentially increasing. */ > -inline int INSN_UID (const_rtx insn) > +ALWAYS_INLINE int INSN_UID (const_rtx insn) > { > return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID", > (insn))->u2.insn_uid; > } > -inline int& INSN_UID (rtx insn) > +ALWAYS_INLINE int& INSN_UID (rtx insn) > { > return RTL_INSN_CHAIN_FLAG_CHECK ("INSN_UID", > (insn))->u2.insn_uid; > @@ -1321,60 +1322,60 @@ inline int& INSN_UID (rtx insn) > and an lvalue form: > SET_NEXT_INSN/SET_PREV_INSN. */ > > -inline rtx_insn *PREV_INSN (const rtx_insn *insn) > +ALWAYS_INLINE rtx_insn *PREV_INSN (const rtx_insn *insn) > { > rtx prev = XEXP (insn, 0); > return safe_as_a (prev); > } > > -inline rtx& SET_PREV_INSN (rtx_insn *insn) > +ALWAYS_INLINE rtx& SET_PREV_INSN (rtx_insn *insn) > { > return XEXP (insn, 0); > } > > -inline rtx_insn *NEXT_INSN (const rtx_insn *insn) > +ALWAYS_INLINE rtx_insn *NEXT_INSN (const rtx_insn *insn) > { > rtx next = XEXP (insn, 1); > return safe_as_a (next); > } > > -inline rtx& SET_NEXT_INSN (rtx_insn *insn) > +ALWAYS_INLINE rtx& SET_NEXT_INSN (rtx_insn *insn) > { > return XEXP (insn, 1); > } > > -inline basic_block BLOCK_FOR_INSN (const_rtx insn) > +ALWAYS_INLINE basic_block BLOCK_FOR_INSN (const_rtx insn) > { > return XBBDEF (insn, 2); > } > > -inline basic_block& BLOCK_FOR_INSN (rtx insn) > +ALWAYS_INLINE basic_block& BLOCK_FOR_INSN (rtx insn) > { > return XBBDEF (insn, 2); > } > > /* The body of an insn. */ > -inline rtx PATTERN (const_rtx insn) > +ALWAYS_INLINE rtx PATTERN (const_rtx insn) > { > return XEXP (insn, 3); > } > > -inline rtx& PATTERN (rtx insn) > +ALWAYS_INLINE rtx& PATTERN (rtx insn) > { > return XEXP (insn, 3); > } > > -inline unsigned int INSN_LOCATION (const_rtx insn) > +ALWAYS_INLINE unsigned int INSN_LOCATION (const_rtx insn) > { > return XUINT (insn, 4); > } > > -inline unsigned int& INSN_LOCATION (rtx insn) > +ALWAYS_INLINE unsigned int& INSN_LOCATION (rtx insn) > { > return XUINT (insn, 4); > } > > -inline bool INSN_HAS_LOCATION (const rtx_insn *insn) > +ALWAYS_INLINE bool INSN_HAS_LOCATION (const rtx_insn *insn) > { > return LOCATION_LOCUS (INSN_LOCATION (insn)) != UNKNOWN_LOCATION; > } > @@ -1387,7 +1388,7 @@ inline bool INSN_HAS_LOCATION (const rtx_insn *insn) > -1 means this instruction has not been recognized yet. */ > #define INSN_CODE(INSN) XINT (INSN, 5) > > -inline rtvec rtx_jump_table_data::get_labels () const > +ALWAYS_INLINE rtvec rtx_jump_table_data::get_labels () const > { > rtx pat = PATTERN (this); > if (GET_CODE (pat) == ADDR_VEC) > @@ -1658,7 +1659,7 @@ enum label_kind > be decremented and possibly the label can be deleted. */ > #define JUMP_LABEL(INSN) XCEXP (INSN, 7, JUMP_INSN) > > -inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn) > +ALWAYS_INLINE rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn) > { > return safe_as_a (JUMP_LABEL (insn)); > } > @@ -1682,7 +1683,7 @@ inline rtx_insn *JUMP_LABEL_AS_INSN (const rtx_insn *insn) > (RTL_FLAG_CHECK1 ("ORIGINAL_REGNO", (RTX), REG)->u2.original_regno) > > /* Force the REGNO macro to only be used on the lhs. */ > -static inline unsigned int > +static ALWAYS_INLINE unsigned int > rhs_regno (const_rtx x) > { > return XCUINT (x, 0, REG); > @@ -1774,7 +1775,7 @@ struct full_rtx_costs > }; > > /* Initialize a full_rtx_costs structure C to the maximum cost. */ > -static inline void > +static ALWAYS_INLINE void > init_costs_to_max (struct full_rtx_costs *c) > { > c->speed = MAX_COST; > @@ -1782,7 +1783,7 @@ init_costs_to_max (struct full_rtx_costs *c) > } > > /* Initialize a full_rtx_costs structure C to zero cost. */ > -static inline void > +static ALWAYS_INLINE void > init_costs_to_zero (struct full_rtx_costs *c) > { > c->speed = 0; > @@ -1791,7 +1792,7 @@ init_costs_to_zero (struct full_rtx_costs *c) > > /* Compare two full_rtx_costs structures A and B, returning true > if A < B when optimizing for speed. */ > -static inline bool > +static ALWAYS_INLINE bool > costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b, > bool speed) > { > @@ -1805,7 +1806,7 @@ costs_lt_p (struct full_rtx_costs *a, struct full_rtx_costs *b, > > /* Increase both members of the full_rtx_costs structure C by the > cost of N insns. */ > -static inline void > +static ALWAYS_INLINE void > costs_add_n_insns (struct full_rtx_costs *c, int n) > { > c->speed += COSTS_N_INSNS (n); > @@ -1904,13 +1905,13 @@ namespace wi > }; > } > > -inline unsigned int > +ALWAYS_INLINE unsigned int > wi::int_traits ::get_precision (const rtx_mode_t &x) > { > return GET_MODE_PRECISION (x.second); > } > > -inline wi::storage_ref > +ALWAYS_INLINE wi::storage_ref > wi::int_traits ::decompose (HOST_WIDE_INT *, > unsigned int precision, > const rtx_mode_t &x) > @@ -1949,7 +1950,7 @@ namespace wi > wide_int max_value (enum machine_mode, signop); > } > > -inline wi::hwi_with_prec > +ALWAYS_INLINE wi::hwi_with_prec > wi::shwi (HOST_WIDE_INT val, enum machine_mode mode) > { > return shwi (val, GET_MODE_PRECISION (mode)); > @@ -1957,7 +1958,7 @@ wi::shwi (HOST_WIDE_INT val, enum machine_mode mode) > > /* Produce the smallest number that is represented in MODE. The precision > is taken from MODE and the sign from SGN. */ > -inline wide_int > +ALWAYS_INLINE wide_int > wi::min_value (enum machine_mode mode, signop sgn) > { > return min_value (GET_MODE_PRECISION (mode), sgn); > @@ -1965,7 +1966,7 @@ wi::min_value (enum machine_mode mode, signop sgn) > > /* Produce the largest number that is represented in MODE. The precision > is taken from MODE and the sign from SGN. */ > -inline wide_int > +ALWAYS_INLINE wide_int > wi::max_value (enum machine_mode mode, signop sgn) > { > return max_value (GET_MODE_PRECISION (mode), sgn); > @@ -2007,7 +2008,7 @@ extern enum rtx_code get_index_code (const struct address_info *); > /* Return the cost of SET X. SPEED_P is true if optimizing for speed > rather than size. */ > > -static inline int > +static ALWAYS_INLINE int > set_rtx_cost (rtx x, bool speed_p) > { > return rtx_cost (x, INSN, 4, speed_p); > @@ -2015,7 +2016,7 @@ set_rtx_cost (rtx x, bool speed_p) > > /* Like set_rtx_cost, but return both the speed and size costs in C. */ > > -static inline void > +static ALWAYS_INLINE void > get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c) > { > get_full_rtx_cost (x, INSN, 4, c); > @@ -2025,7 +2026,7 @@ get_full_set_rtx_cost (rtx x, struct full_rtx_costs *c) > of a register move. SPEED_P is true if optimizing for speed rather > than size. */ > > -static inline int > +static ALWAYS_INLINE int > set_src_cost (rtx x, bool speed_p) > { > return rtx_cost (x, SET, 1, speed_p); > @@ -2033,7 +2034,7 @@ set_src_cost (rtx x, bool speed_p) > > /* Like set_src_cost, but return both the speed and size costs in C. */ > > -static inline void > +static ALWAYS_INLINE void > get_full_set_src_cost (rtx x, struct full_rtx_costs *c) > { > get_full_rtx_cost (x, SET, 1, c); > @@ -3055,7 +3056,7 @@ extern struct target_rtl *this_target_rtl; > > #ifndef GENERATOR_FILE > /* Return the attributes of a MEM rtx. */ > -static inline struct mem_attrs * > +static ALWAYS_INLINE struct mem_attrs * > get_mem_attrs (const_rtx x) > { > struct mem_attrs *attrs; > diff --git a/include/ansidecl.h b/include/ansidecl.h > index 0fb23bb..9132ee0 100644 > --- a/include/ansidecl.h > +++ b/include/ansidecl.h > @@ -306,6 +306,12 @@ So instead we use the macro below and test it against specific values. */ > #define ENUM_BITFIELD(TYPE) unsigned int > #endif > > +#ifdef __GNUC__ > +#define ALWAYS_INLINE __attribute__ ((always_inline)) inline > +#else > +#define ALWAYS_INLINE inline > +#endif > + > #ifdef __cplusplus > } > #endif > -- > 2.1.0 >