public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Sandiford <richard.sandiford@arm.com>
To: Tamar Christina <Tamar.Christina@arm.com>
Cc: "gcc-patches\@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
	 nd <nd@arm.com>,  Richard Earnshaw <Richard.Earnshaw@arm.com>,
	 Marcus Shawcroft <Marcus.Shawcroft@arm.com>,
	 "ktkachov\@gcc.gnu.org" <ktkachov@gcc.gnu.org>
Subject: Re: [PATCH 3/4]AArch64: add new alternative with early clobber to patterns
Date: Thu, 30 May 2024 21:12:24 +0100	[thread overview]
Message-ID: <mpt8qzr9hzr.fsf@arm.com> (raw)
In-Reply-To: <VI1PR08MB53257FD008C912EE0C6C8790FFF12@VI1PR08MB5325.eurprd08.prod.outlook.com> (Tamar Christina's message of "Tue, 28 May 2024 09:38:42 +0000")

Tamar Christina <Tamar.Christina@arm.com> writes:
> [...]
> @@ -6651,8 +6661,10 @@ (define_insn "and<mode>3"
>  	(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
>  		      (match_operand:PRED_ALL 2 "register_operand")))]
>    "TARGET_SVE"
> -  {@ [ cons: =0, 1  , 2   ]
> -     [ Upa     , Upa, Upa ] and\t%0.b, %1/z, %2.b, %2.b
> +  {@ [ cons: =0, 1  , 2  ; attrs: pred_clobber ]
> +     [ &Upa    , Upa, Upa; yes                 ] and\t%0.b, %1/z, %2.b, %2.b
> +     [ ?Upa    , 0  , Upa; yes                 ] ^
> +     [ Upa     , Upa, Upa; no                  ] ^

I think this ought to be:

> +  {@ [ cons: =0, 1  ,  2   ; attrs: pred_clobber ]
> +     [ &Upa    , Upa,  Upa ; yes                 ] and\t%0.b, %1/z, %2.b, %2.b
> +     [ ?Upa    , 0Upa, 0Upa; yes                 ] ^
> +     [ Upa     , Upa,  Upa ; no                  ] ^

so that operand 2 can be tied to operand 0 in the worst case.  Similarly:

>    }
>  )
>  
> @@ -6679,8 +6691,10 @@ (define_insn "@aarch64_pred_<optab><mode>_z"
>  	    (match_operand:PRED_ALL 3 "register_operand"))
>  	  (match_operand:PRED_ALL 1 "register_operand")))]
>    "TARGET_SVE"
> -  {@ [ cons: =0, 1  , 2  , 3   ]
> -     [ Upa     , Upa, Upa, Upa ] <logical>\t%0.b, %1/z, %2.b, %3.b
> +  {@ [ cons: =0, 1  , 2  , 3  ; attrs: pred_clobber ]
> +     [ &Upa    , Upa, Upa, Upa; yes                 ] <logical>\t%0.b, %1/z, %2.b, %3.b
> +     [ ?Upa    , 0  , Upa, Upa; yes                 ] ^
> +     [ Upa     , Upa, Upa, Upa; no                  ] ^
>    }
>  )

this would be:

  {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
     [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical>\t%0.b, %1/z, %2.b, %3.b
     [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
     [ Upa     , Upa , Upa,  Upa ; no                  ] ^
  }

Same idea for the rest.

I tried this on:

----------------------------------------------------------------------
#include <arm_sve.h>

void use (svbool_t, svbool_t, svbool_t);

void
f1 (svbool_t p0, svbool_t p1, svbool_t p2, int n, svbool_t *ptr)
{
  while (n--)
    p2 = svand_z (p0, p1, p2);
  *ptr = p2;
}

void
f2 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t *ptr)
{
  *ptr = svand_z (p0, p1, p2);
}

void
f3 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t *ptr)
{
  use (svand_z (p0, p1, p2), p1, p2);
}

void
f4 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t *ptr)
{
  use (p0, svand_z (p0, p1, p2), p2);
}

void
f5 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t *ptr)
{
  use (p0, p1, svand_z (p0, p1, p2));
}
----------------------------------------------------------------------

and it seemed to produce the right output:

----------------------------------------------------------------------
f1:
        cbz     w0, .L2
        sub     w0, w0, #1
        .p2align 5,,15
.L3:
        and     p2.b, p0/z, p1.b, p2.b
        sub     w0, w0, #1
        cmn     w0, #1
        bne     .L3
.L2:
        str     p2, [x1]
        ret

f2:
        and     p3.b, p0/z, p1.b, p2.b
        str     p3, [x0]
        ret

f3:
        and     p0.b, p0/z, p1.b, p2.b
        b       use

f4:
        and     p1.b, p0/z, p1.b, p2.b
        b       use

f5:
        and     p2.b, p0/z, p1.b, p2.b
        b       use
----------------------------------------------------------------------

(with that coming directly from RA, rather than being cleaned
up later)

> [...]
> @@ -10046,8 +10104,10 @@ (define_insn_and_rewrite "*aarch64_brkn_cc"
>  	   (match_dup 3)]
>  	  UNSPEC_BRKN))]
>    "TARGET_SVE"
> -  {@ [ cons: =0, 1  , 2  , 3 ]
> -     [ Upa     , Upa, Upa, 0 ] brkns\t%0.b, %1/z, %2.b, %0.b
> +  {@ [ cons: =0, 1  , 2  , 3; attrs: pred_clobber ]
> +     [ &Upa    , Upa, Upa, 0; yes                 ] brkns\t%0.b, %1/z, %2.b, %0.b
> +     [ ?Upa    , 0  , Upa, 0; yes                 ] ^
> +     [ Upa     , Upa, Upa, 0; no                  ] ^
>    }
>    "&& (operands[4] != CONST0_RTX (VNx16BImode)
>         || operands[5] != CONST0_RTX (VNx16BImode))"

Probably best to leave this out.  All alternatives require operand 3
to match operand 0.  So operands 1 and 2 will only match operand 0
if they're the same as operand 3.  In that case it'd be better to
allow the sharing rather than force the same value to be stored
in two registers.

That is, if op1 != op3 && op2 != op3 then we get what we want
naturally, regardless of tuning.

The same thing would apply to the BRKN instances of <brk_reg_con>:

> @@ -10020,8 +10076,10 @@ (define_insn "@aarch64_brk<brk_op>"
>  	   (match_operand:VNx16BI 3 "register_operand")]
>  	  SVE_BRK_BINARY))]
>    "TARGET_SVE"
> -  {@ [ cons: =0, 1  , 2  , 3             ]
> -     [ Upa     , Upa, Upa, <brk_reg_con> ] brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b
> +  {@ [ cons: =0,  1 , 2  , 3            ; attrs: pred_clobber ]
> +     [ &Upa    , Upa, Upa, <brk_reg_con>; yes                 ] brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b
> +     [ ?Upa    , 0  , Upa, <brk_reg_con>; yes                 ] ^
> +     [ Upa     , Upa, Upa, <brk_reg_con>; no                  ] ^
>    }
>  )

but I think we should keep this factoring/abstraction and just add
the extra alternatives regardless.  I.e.:

  {@ [ cons: =0, 1   , 2   , 3             ; attrs: pred_clobber ]
     [ &Upa    , Upa , Upa , <brk_reg_con> ; yes                 ] brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b
     [ ?Upa    , 0Upa, 0Upa, 0<brk_reg_con>; yes                 ] ^
     [ Upa     , Upa , Upa , <brk_reg_con> ; no                  ] ^

(even though this gives "00", which is valid but redundant).

OK with those changes, thanks.

Richard

  reply	other threads:[~2024-05-30 20:12 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-22  9:29 Tamar Christina
2024-05-22  9:47 ` Richard Sandiford
2024-05-22 11:00   ` Tamar Christina
2024-05-22 11:24     ` Richard Sandiford
2024-05-28  9:38       ` Tamar Christina
2024-05-30 20:12         ` Richard Sandiford [this message]
  -- strict thread matches above, loose matches on Subject: below --
2024-05-15 10:28 [PATCH 0/4]AArch64: support conditional early clobbers on certain operations Tamar Christina
2024-05-15 10:29 ` [PATCH 3/4]AArch64: add new alternative with early clobber to patterns Tamar Christina

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=mpt8qzr9hzr.fsf@arm.com \
    --to=richard.sandiford@arm.com \
    --cc=Marcus.Shawcroft@arm.com \
    --cc=Richard.Earnshaw@arm.com \
    --cc=Tamar.Christina@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=ktkachov@gcc.gnu.org \
    --cc=nd@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).