From: James Greenhalgh <james.greenhalgh@arm.com>
To: Tamar Christina <Tamar.Christina@arm.com>
Cc: Richard Sandiford <richard.sandiford@linaro.org>,
GCC Patches <gcc-patches@gcc.gnu.org>, nd <nd@arm.com>,
Marcus Shawcroft <Marcus.Shawcroft@arm.com>,
Richard Earnshaw <Richard.Earnshaw@arm.com>
Subject: Re: [PATCH][GCC][AArch64] optimize float immediate moves (1 /4) - infrastructure.
Date: Wed, 19 Jul 2017 15:49:00 -0000 [thread overview]
Message-ID: <20170719154731.GA35511@arm.com> (raw)
In-Reply-To: <VI1PR0801MB203183E948429F85BFB14E3CFFDF0@VI1PR0801MB2031.eurprd08.prod.outlook.com>
On Mon, Jun 26, 2017 at 11:49:42AM +0100, Tamar Christina wrote:
> Hi All,
>
> I've updated patch accordingly.
>
> This mostly involves removing the loop to create the ival
> and removing the *2 code and instead defaulting to 64bit
> and switching to 128 when needed.
>
> Regression tested on aarch64-none-linux-gnu and no regressions.
>
> OK for trunk?
Almost, with a couple of small changes and clarifications.
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 04417dcd609f6e8ff594a9c5853b3143696d3208..efb027f7fa9b9750b019c529bbcfc8b73dbaf804 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4668,6 +4670,62 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
> return true;
> }
>
> +/* Return the binary representation of floating point constant VALUE in INTVAL.
> + If the value cannot be converted, return false without setting INTVAL.
> + The conversion is done in the given MODE. */
> +bool
> +aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
> +{
> + machine_mode mode = GET_MODE (value);
> + if (GET_CODE (value) != CONST_DOUBLE
> + || !SCALAR_FLOAT_MODE_P (mode)
> + || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
> + return false;
> +
> + unsigned HOST_WIDE_INT ival = 0;
> +
> + /* Only support up to DF mode. */
> + gcc_assert (GET_MODE_BITSIZE (mode) <= 64);
GET_MODE_BITSIZE (DFmode) would be more self-documenting.
> +
> + long res[2];
> + real_to_target (res,
> + CONST_DOUBLE_REAL_VALUE (value),
> + REAL_MODE_FORMAT (mode));
> +
> + ival = zext_hwi (res[0], 32);
> + if (GET_MODE_BITSIZE (mode) == 64)
Likewise here.
> + ival |= (zext_hwi (res[1], 32) << 32);
> +
> + *intval = ival;
> + return true;
> +}
> +
> @@ -4680,6 +4738,46 @@ aarch64_float_const_zero_rtx_p (rtx x)
> return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
> }
>
> +/* Return TRUE if rtx X is immediate constant that fits in a single
> + MOVI immediate operation. */
> +bool
> +aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
> +{
> + if (!TARGET_SIMD)
> + return false;
> +
> + machine_mode vmode, imode;
> + unsigned HOST_WIDE_INT ival;
> +
> + /* Don't write float constants out to memory. */
This comment seems (only a little) out of line with the code below - "Don't
write float constants out to memory if we can represent them as integers."
> + if (GET_CODE (x) == CONST_DOUBLE
> + && SCALAR_FLOAT_MODE_P (mode))
> + {
> + if (!aarch64_reinterpret_float_as_int (x, &ival))
> + return false;
> +
> + imode = int_mode_for_mode (mode);
> + }
> + else if (GET_CODE (x) == CONST_INT
> + && SCALAR_INT_MODE_P (mode))
> + {
> + imode = mode;
> + ival = INTVAL (x);
> + }
> + else
> + return false;
> +
> + /* use a 64 bit mode for everything except for DI/DF mode, where we use
> + a 128 bit vector mode. */
> + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
> +
> + vmode = aarch64_simd_container_mode (imode, width);
> + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
> +
> + return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);
I still wonder whether we could rewrite aarch64_simd_valid)_immediate to
avoid the need for a 128-bit vector here - 64-bits are good enough.
This doesn't need fixed for this patch, but it would make for a small
optimisation.
> +}
> +
> +
> /* Return the fixed registers used for condition codes. */
>
> static bool
> @@ -5857,12 +5955,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
> return NO_REGS;
> }
>
> - /* If it's an integer immediate that MOVI can't handle, then
> - FP_REGS is not an option, so we return NO_REGS instead. */
> - if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
> - && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
> - return NO_REGS;
> -
I don't understand this relaxation could you explain what this achieves
and why it is safe in this patch?
> /* Register eliminiation can result in a request for
> SP+constant->FP_REGS. We cannot support such operations which
> use SP as source and an FP_REG as destination, so reject out
> @@ -6773,26 +6865,44 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
> return true;
>
> case CONST_DOUBLE:
> +
> + /* First determine number of instructions to do the move
> + as an integer constant. */
> + if (!aarch64_float_const_representable_p (x)
> + && !aarch64_can_const_movi_rtx_p (x, mode)
> + && aarch64_float_const_rtx_p (x))
> + {
> + unsigned HOST_WIDE_INT ival;
> + bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
> + gcc_assert (succeed);
> +
> + machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode);
> + int ncost = aarch64_internal_mov_immediate
> + (NULL_RTX, gen_int_mode (ival, imode), false, imode);
> + *cost += COSTS_N_INSNS (ncost);
> + return true;
> + }
> +
> if (speed)
> {
> - /* mov[df,sf]_aarch64. */
> - if (aarch64_float_const_representable_p (x))
> - /* FMOV (scalar immediate). */
> - *cost += extra_cost->fp[mode == DFmode].fpconst;
> - else if (!aarch64_float_const_zero_rtx_p (x))
> - {
> - /* This will be a load from memory. */
> - if (mode == DFmode)
> + /* mov[df,sf]_aarch64. */
> + if (aarch64_float_const_representable_p (x))
> + /* FMOV (scalar immediate). */
> + *cost += extra_cost->fp[mode == DFmode].fpconst;
> + else if (!aarch64_float_const_zero_rtx_p (x))
> + {
> + /* This will be a load from memory. */
> + if (mode == DFmode)
> *cost += extra_cost->ldst.loadd;
> - else
> + else
> *cost += extra_cost->ldst.loadf;
> - }
> - else
> - /* Otherwise this is +0.0. We get this using MOVI d0, #0
> - or MOV v0.s[0], wzr - neither of which are modeled by the
> - cost tables. Just use the default cost. */
> - {
> - }
> + }
> + else
> + /* Otherwise this is +0.0. We get this using MOVI d0, #0
> + or MOV v0.s[0], wzr - neither of which are modeled by the
> + cost tables. Just use the default cost. */
> + {
> + }
Why reindent this code? From what I can tell this makes the code less
conformant to GNU style.
> }
>
> return true;
> @@ -6974,7 +7084,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
> if (speed)
> *cost += extra_cost->fp[mode == DFmode].compare;
>
> - if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
> + if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
Likewise here?
> {
> *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
> /* FCMP supports constant 0.0 for no extra cost. */
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index cd7ded986630c14ed6d42618b2a1f9baa0cbd192..6992c82fa790eac34669fcc5b030e395ad332201 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -53,6 +53,11 @@
> (ior (match_operand 0 "register_operand")
> (match_test "op == const0_rtx"))))
>
> +(define_predicate "aarch64_reg_or_fp_float"
> + (ior (match_operand 0 "register_operand")
> + (and (match_code "const_double")
> + (match_test "aarch64_float_const_rtx_p (op)"))))
> +
Unused?
> (define_predicate "aarch64_reg_or_fp_zero"
> (ior (match_operand 0 "register_operand")
> (and (match_code "const_double")
Thanks,
James
next prev parent reply other threads:[~2017-07-19 15:49 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-06-07 11:38 Tamar Christina
2017-06-08 10:32 ` Richard Sandiford
2017-06-12 7:29 ` Tamar Christina
2017-06-13 16:39 ` James Greenhalgh
2017-06-13 17:08 ` Richard Sandiford
2017-06-15 12:50 ` Tamar Christina
2017-06-26 10:49 ` Tamar Christina
2017-07-03 6:12 ` Tamar Christina
2017-07-10 7:35 ` Tamar Christina
2017-07-19 15:49 ` James Greenhalgh [this message]
2017-07-26 16:01 ` Tamar Christina
2017-07-27 15:43 ` James Greenhalgh
2017-07-28 15:06 ` Tamar Christina
2017-08-01 11:16 ` Bin.Cheng
2017-08-01 11:19 ` Tamar Christina
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170719154731.GA35511@arm.com \
--to=james.greenhalgh@arm.com \
--cc=Marcus.Shawcroft@arm.com \
--cc=Richard.Earnshaw@arm.com \
--cc=Tamar.Christina@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=nd@arm.com \
--cc=richard.sandiford@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).