From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 85820 invoked by alias); 9 Jan 2019 10:01:09 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 85793 invoked by uid 89); 9 Jan 2019 10:01:08 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.9 required=5.0 tests=BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS,TIME_LIMIT_EXCEEDED autolearn=unavailable version=3.3.2 spammy= X-HELO: foss.arm.com Received: from foss.arm.com (HELO foss.arm.com) (217.140.101.70) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 09 Jan 2019 10:00:57 +0000 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id ECCCA80D; Wed, 9 Jan 2019 02:00:45 -0800 (PST) Received: from localhost (unknown [10.32.98.35]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 27A853F70D; Wed, 9 Jan 2019 02:00:45 -0800 (PST) From: Richard Sandiford To: Steve Ellcey Mail-Followup-To: Steve Ellcey ,"sellcey\@cavium.com" , "gcc-patches\@gcc.gnu.org" , richard.sandiford@arm.com Cc: "sellcey\@cavium.com" , "gcc-patches\@gcc.gnu.org" Subject: Re: [Patch 4/4][Aarch64] v2: Implement Aarch64 SIMD ABI References: <1541699749.12016.9.camel@cavium.com> <87tvjq3kqs.fsf@arm.com> <875zv0l64x.fsf@arm.com> <7bdc455d8d0b38778e151ad8ce8cfe6b69c580c5.camel@marvell.com> Date: Wed, 09 Jan 2019 10:01:00 -0000 In-Reply-To: <7bdc455d8d0b38778e151ad8ce8cfe6b69c580c5.camel@marvell.com> (Steve Ellcey's message of "Tue, 8 Jan 2019 23:42:37 +0000") Message-ID: <87d0p6gng4.fsf@arm.com> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.1 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-SW-Source: 2019-01/txt/msg00466.txt.bz2 Steve Ellcey writes: > /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves > the lower 64 bits of a 128-bit register. Tell the compiler the callee > clobbers the top 64 bits when restoring the bottom 64 bits. */ > > static bool > -aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode) > +aarch64_hard_regno_call_part_clobbered (rtx_insn *insn, unsigned int regno, > + machine_mode mode) > { > - return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8); > + bool simd_p = insn && CALL_P (insn) && aarch64_simd_call_p (insn); > + return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), simd_p ? 16: 8); Should be a space before the ":" (which pushes the line over 80 chars). > diff --git a/gcc/hooks.h b/gcc/hooks.h > index 9e4bc29..dc6b2e1 100644 > --- a/gcc/hooks.h > +++ b/gcc/hooks.h > @@ -40,7 +40,9 @@ extern bool hook_bool_const_rtx_insn_const_rtx_insn_true (const rtx_insn *, > extern bool hook_bool_mode_uhwi_false (machine_mode, > unsigned HOST_WIDE_INT); > extern bool hook_bool_puint64_puint64_true (poly_uint64, poly_uint64); > -extern bool hook_bool_uint_mode_false (unsigned int, machine_mode); > +extern bool hook_bool_insn_uint_mode_false (rtx_insn *, > + unsigned int, > + machine_mode); No need to break the line after "rtx_insn *,". > extern bool hook_bool_uint_mode_true (unsigned int, machine_mode); > extern bool hook_bool_tree_false (tree); > extern bool hook_bool_const_tree_false (const_tree); > diff --git a/gcc/ira-conflicts.c b/gcc/ira-conflicts.c > index b57468b..b697e57 100644 > --- a/gcc/ira-conflicts.c > +++ b/gcc/ira-conflicts.c > @@ -808,7 +808,8 @@ ira_build_conflicts (void) > regs must conflict with them. */ > for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) > if (!TEST_HARD_REG_BIT (call_used_reg_set, regno) > - && targetm.hard_regno_call_part_clobbered (regno, > + && targetm.hard_regno_call_part_clobbered (NULL, > + regno, > obj_mode)) > { > SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj), regno); No need to break the line after "NULL,". > diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c > index e5d8804..7f60712 100644 > --- a/gcc/ira-costs.c > +++ b/gcc/ira-costs.c > @@ -2379,7 +2379,8 @@ ira_tune_allocno_costs (void) > *crossed_calls_clobber_regs) > && (ira_hard_reg_set_intersection_p (regno, mode, > call_used_reg_set) > - || targetm.hard_regno_call_part_clobbered (regno, > + || targetm.hard_regno_call_part_clobbered (NULL, > + regno, > mode))) > cost += (ALLOCNO_CALL_FREQ (a) > * (ira_memory_move_cost[mode][rclass][0] Same here. > diff --git a/gcc/lra-int.h b/gcc/lra-int.h > index 9d9e81d..ccc7b00 100644 > --- a/gcc/lra-int.h > +++ b/gcc/lra-int.h > @@ -117,6 +117,8 @@ struct lra_reg > /* This member is set up in lra-lives.c for subsequent > assignments. */ > lra_copy_t copies; > + /* Call instruction that may affect this register. */ > + rtx_insn *call_insn; > }; > > /* References to the common info about each register. */ If we do this right, I think the new field should be able to replace call_p. The pseudo crosses a call iff call_insn is nonnull. I think the field belongs after: poly_int64 offset; since it comes under: /* The following fields are defined only for pseudos. */ rather than: /* These members are set up in lra-lives.c and updated in lra-coalesce.c. */ > diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c > index 7b60691..0b96891 100644 > --- a/gcc/lra-lives.c > +++ b/gcc/lra-lives.c > @@ -579,18 +579,26 @@ lra_setup_reload_pseudo_preferenced_hard_reg (int regno, > PSEUDOS_LIVE_THROUGH_CALLS and PSEUDOS_LIVE_THROUGH_SETJUMPS. */ > static inline void > check_pseudos_live_through_calls (int regno, > - HARD_REG_SET last_call_used_reg_set) > + HARD_REG_SET last_call_used_reg_set, > + rtx_insn *call_insn) > { > int hr; > > + if (call_insn && CALL_P (call_insn) && targetm.return_call_with_max_clobbers) > + lra_reg_info[regno].call_insn = > + targetm.return_call_with_max_clobbers (call_insn, > + lra_reg_info[regno].call_insn); > + This should happen... > if (! sparseset_bit_p (pseudos_live_through_calls, regno)) > return; ...here, where we know that regno is live across a call like that described by call_insn. call_insn should be nonnull and a CALL_P in that case. We should assert for that regardless of whether targetm.return_call_with_max_clobbers is nonnull. The update should be something like: rtx_insn *old_call_insn = lra_reg_info[regno].call_insn; if (!old_call_insn || (targetm.return_call_with_max_clobbers && targetm.return_call_with_max_clobbers (old_call_insn call_insn) == call_insn)) lra_reg_info[regno].call_insn = call_insn; > sparseset_clear_bit (pseudos_live_through_calls, regno); > IOR_HARD_REG_SET (lra_reg_info[regno].conflict_hard_regs, > last_call_used_reg_set); > > for (hr = 0; HARD_REGISTER_NUM_P (hr); hr++) > - if (targetm.hard_regno_call_part_clobbered (hr, > + if (targetm.hard_regno_call_part_clobbered (call_insn, > + hr, > PSEUDO_REGNO_MODE (regno))) > add_to_hard_reg_set (&lra_reg_info[regno].conflict_hard_regs, > PSEUDO_REGNO_MODE (regno), hr); No need to break the line after "call_insn,". > @@ -658,6 +667,17 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) > if (lra_dump_file != NULL) > fprintf (lra_dump_file, " BB %d\n", bb->index); > > + call_insn = NULL; > + if (targetm.return_call_with_max_clobbers) > + { > + FOR_BB_INSNS_REVERSE_SAFE (bb, curr_insn, next) > + { > + if (CALL_P (curr_insn)) > + call_insn = targetm.return_call_with_max_clobbers (curr_insn, > + call_insn); > + } > + } > + > /* Scan the code of this basic block, noting which pseudos and hard > regs are born or die. > What I meant about keeping track of the current call during the main block walk is that we shouldn't have this loop. Instead we should update call_insn when we see a CALL_P... > @@ -847,7 +867,8 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) > update_pseudo_point (reg->regno, curr_point, USE_POINT); > mark_regno_live (reg->regno, reg->biggest_mode); > check_pseudos_live_through_calls (reg->regno, > - last_call_used_reg_set); > + last_call_used_reg_set, > + call_insn); > } > > if (!HARD_REGISTER_NUM_P (reg->regno)) > @@ -896,7 +917,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) > > if (call_p) > { > - if (! flag_ipa_ra) > + if (! flag_ipa_ra && ! targetm.return_call_with_max_clobbers) > COPY_HARD_REG_SET(last_call_used_reg_set, call_used_reg_set); > else > { ...here. In the "if" arm we can set call_insn to the current instruction, because any call will do. In the "else" arm we should extend this: bool flush = (! hard_reg_set_empty_p (last_call_used_reg_set) && ! hard_reg_set_equal_p (last_call_used_reg_set, this_call_used_reg_set)) so that we also flush when the old call insn part-clobbers a different set of registers. The idea is that we're trying to do something similar to -fipa-ra: the registers clobbered by a call depend on the call_insn. -fipa-ra does that by dividing the block into regions with the same call behaviour. Then: - pseudos_live_through_calls says which registers are live across a call in the current region - last_call_used_reg_set describes the set of registers that are clobbered by calls in the current region A new region starts whenever we find a call instruction that clobbers a different set of registers from those in last_call_used_reg_set (whether that's more registers or fewer). We want to divide the block into regions in a similar way for return_call_with_max_clobbers. A new region starts whenever the new call preserves a different set of registers from the original call. This can be tested by something like: /* Return true if call instructions CALL1 and CALL2 use ABIs that preserve the same set of registers. */ static bool calls_have_same_clobbers_p (rtx_insn *call1, rtx_insn *call2) { if (!targetm.return_call_with_max_clobbers) return false; return (targetm.return_call_with_max_clobbers (call1, call2) == call1 && targetm.return_call_with_max_clobbers (call2, call1) == call2); } I think it would make sense for return_call_with_max_clobbers to only handle nonnull isnsn (I probably said otherwise earlier, sorry). > diff --git a/gcc/reload.c b/gcc/reload.c > index 6cfd5e2..bff84da 100644 > --- a/gcc/reload.c > +++ b/gcc/reload.c > @@ -6912,13 +6912,16 @@ find_equiv_reg (rtx goal, rtx_insn *insn, enum reg_class rclass, int other, > if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER) > for (i = 0; i < nregs; ++i) > if (call_used_regs[regno + i] > - || targetm.hard_regno_call_part_clobbered (regno + i, mode)) > + || targetm.hard_regno_call_part_clobbered (NULL, > + regno + i, > + mode)) > return 0; > > if (valueno >= 0 && valueno < FIRST_PSEUDO_REGISTER) > for (i = 0; i < valuenregs; ++i) > if (call_used_regs[valueno + i] > - || targetm.hard_regno_call_part_clobbered (valueno + i, > + || targetm.hard_regno_call_part_clobbered (NULL, > + valueno + i, > mode)) > return 0; > } No need to split the lines after "NULL,". > diff --git a/gcc/target.def b/gcc/target.def > index e8f0f70..839d0b9 100644 > --- a/gcc/target.def > +++ b/gcc/target.def > @@ -5766,14 +5766,29 @@ DEFHOOK > (hard_regno_call_part_clobbered, > "This hook should return true if @var{regno} is partly call-saved and\n\ > partly call-clobbered, and if a value of mode @var{mode} would be partly\n\ > -clobbered by a call. For example, if the low 32 bits of @var{regno} are\n\ > -preserved across a call but higher bits are clobbered, this hook should\n\ > -return true for a 64-bit mode but false for a 32-bit mode.\n\ > +clobbered by the @var{insn} call. If @var{insn} is NULL then it should\n\ maybe "by call instruction @var{insn}". Thanks, Richard