From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <richard.sandiford@arm.com>
Received: from foss.arm.com (foss.arm.com [217.140.110.172])
 by sourceware.org (Postfix) with ESMTP id 31FA03858D28
 for <gcc-patches@gcc.gnu.org>; Wed, 17 Nov 2021 15:38:46 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 31FA03858D28
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14])
 by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id C4A1E1FB;
 Wed, 17 Nov 2021 07:38:45 -0800 (PST)
Received: from localhost (e121540-lin.manchester.arm.com [10.32.98.88])
 by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 32C213F5A1;
 Wed, 17 Nov 2021 07:38:45 -0800 (PST)
From: Richard Sandiford <richard.sandiford@arm.com>
To: "Andre Vieira \(lists\)" <andre.simoesdiasvieira@arm.com>
Mail-Followup-To: "Andre Vieira \(lists\)" <andre.simoesdiasvieira@arm.com>,
 Richard Biener <rguenther@suse.de>,
 "gcc-patches\@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
 richard.sandiford@arm.com
Cc: Richard Biener <rguenther@suse.de>,
 "gcc-patches\@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: Re: [AArch64] Enable generation of FRINTNZ instructions
References: <8225375c-eb9e-f9b3-6bcd-9fbccf2fc87b@arm.com>
 <70s9nn94-452-5rrr-4458-q6n3qp563652@fhfr.qr>
 <36e3469a-3922-d49e-4006-0088eac29157@arm.com>
 <653o8886-3p5n-sr82-9n83-71q33o8824@fhfr.qr>
 <6c730f35-10b1-2843-cffc-4ed0851380be@arm.com>
Date: Wed, 17 Nov 2021 15:38:43 +0000
In-Reply-To: <6c730f35-10b1-2843-cffc-4ed0851380be@arm.com> (Andre Vieira's
 message of "Wed, 17 Nov 2021 13:30:31 +0000")
Message-ID: <mptfsru7px8.fsf@arm.com>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux)
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
X-Spam-Status: No, score=-12.3 required=5.0 tests=BAYES_00, GIT_PATCH_0,
 KAM_DMARC_STATUS, KAM_LOTSOFHASH, KAM_SHORT, SPF_HELO_NONE, SPF_PASS,
 TXREP autolearn=ham autolearn_force=no version=3.4.4
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Wed, 17 Nov 2021 15:38:48 -0000

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 4035e061706793849c68ae09bcb2e4b9580ab7b6..62adbc4cb6bbbe0c856f9fbe4=
51aee08f2dea3b5 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -7345,6 +7345,14 @@ (define_insn "despeculate_simpleti"
>     (set_attr "speculation_barrier" "true")]
>  )
>=20=20
> +(define_expand "ftrunc<mode><frintnz_mode>2"
> +  [(set (match_operand:VSFDF 0 "register_operand" "=3Dw")
> +        (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
> +		      FRINTNZ))]
> +  "TARGET_FRINT && TARGET_FLOAT
> +   && !(VECTOR_MODE_P (<MODE>mode) && !TARGET_SIMD)"
> +)

Probably just me, but this condition seems quite hard to read.
I think it'd be better to add conditions to the VSFDF definition instead,
a bit like we do for the HF entries in VHSDF_HSDF and VHSDF_DF.  I.e.:

(define_mode_iterator VSFDF [(V2SF "TARGET_SIMD")
			     (V4SF "TARGET_SIMD")
			     (V2DF "TARGET_SIMD")
			     (SF "TARGET_FLOAT")
			     (DF "TARGET_FLOAT")])

Then the condition can be "TARGET_FRINT".

Same for the existing aarch64_<frintnzs_op><mode>.

> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index bb13c6cce1bf55633760bc14980402f1f0ac1689..fb97d37cecae17cdb6444e7f3=
391361b214f0712 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -269,6 +269,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, un=
ary)
>  DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
>  DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
>  DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
> +DEF_INTERNAL_OPTAB_FN (FTRUNC_INT, ECF_CONST, ftruncint, ftrunc_int)

ftrunc_int should be described in the comment at the top of the file.
E.g.:

  - ftrunc_int: a unary conversion optab that takes and returns values
    of the same mode, but internally converts via another mode.  This
    second mode is specified using a dummy final function argument.

> diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz.c b/gcc/testsuite/g=
cc.target/aarch64/frintnz.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..2e1971f8aa11d8b95f454d03a=
03e050a3bf96747
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/frintnz.c
> @@ -0,0 +1,88 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=3Darmv8.5-a" } */
> +/* { dg-require-effective-target arm_v8_5a_frintnzx_ok } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +/*
> +** f1:
> +**	...
> +**	frint32z	s0, s0
> +**	...

Are these functions ever more than just:

f1:
	frint32z	s0, s0
	ret

?  If not, I think we should match that sequence and =E2=80=9Cdefend=E2=80=
=9D the
good codegen.  The problem with ... on both sides is that it's
then not clear why we can rely on register 0 being used.

> +*/
> +float
> +f1 (float x)
> +{
> +  int y =3D x;
> +  return (float) y;
> +}
> +
> +/*
> +** f2:
> +**	...
> +**	frint64z	s0, s0
> +**	...
> +*/
> +float
> +f2 (float x)
> +{
> +  long long int y =3D x;
> +  return (float) y;
> +}
> +
> +/*
> +** f3:
> +**	...
> +**	frint32z	d0, d0
> +**	...
> +*/
> +double
> +f3 (double x)
> +{
> +  int y =3D x;
> +  return (double) y;
> +}
> +
> +/*
> +** f4:
> +**	...
> +**	frint64z	d0, d0
> +**	...
> +*/
> +double
> +f4 (double x)
> +{
> +  long long int y =3D x;
> +  return (double) y;
> +}
> +
> +float
> +f1_dont (float x)
> +{
> +  unsigned int y =3D x;
> +  return (float) y;
> +}
> +
> +float
> +f2_dont (float x)
> +{
> +  unsigned long long int y =3D x;
> +  return (float) y;
> +}
> +
> +double
> +f3_dont (double x)
> +{
> +  unsigned int y =3D x;
> +  return (double) y;
> +}
> +
> +double
> +f4_dont (double x)
> +{
> +  unsigned long long int y =3D x;
> +  return (double) y;
> +}
> +
> +/* Make sure the 'dont's don't generate any frintNz.  */
> +/* { dg-final { scan-assembler-times {frint32z} 2 } } */
> +/* { dg-final { scan-assembler-times {frint64z} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c b/gcc/testsu=
ite/gcc.target/aarch64/merge_trunc1.c
> index 07217064e2ba54fcf4f5edc440e6ec19ddae66e1..3b34dc3ad79f1406a41ec4c00=
db10347ba1ca2c4 100644
> --- a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -ffast-math" } */
> +/* { dg-skip-if "" { arm_v8_5a_frintnzx_ok } } */
>=20=20
>  float
>  f1 (float x)
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/ta=
rget-supports.exp
> index 8cbda192fe0fae59ea208ee43696b4d22c43e61e..7fa1659ce734257f3cd96f1e2=
e50ace4d02dcf51 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -11365,6 +11365,33 @@ proc check_effective_target_arm_v8_3a_bkey_direc=
tive { } {
>  	}]
>  }
>=20=20
> +# Return 1 if the target supports ARMv8.5 scalar and Adv.Simd FRINT32[ZX]

Armv8.5-A

> +# and FRINT64[ZX] instructions, 0 otherwise. The test is valid for AArch=
64.
> +# Record the command line options needed.
> +
> +proc check_effective_target_arm_v8_5a_frintnzx_ok_nocache { } {
> +
> +    if { ![istarget aarch64*-*-*] } {
> +        return 0;
> +    }
> +
> +    if { [check_no_compiler_messages_nocache \
> +	      arm_v8_5a_frintnzx_ok assembly {
> +	#if !defined (__ARM_FEATURE_FRINT)
> +	#error "__ARM_FEATURE_FRINT not defined"
> +	#endif
> +    } [current_compiler_flags]] } {
> +	return 1;
> +    }
> +
> +    return 0;
> +}
> +
> +proc check_effective_target_arm_v8_5a_frintnzx_ok { } {

The new condition should be documented in sourcebuild.texi, near
the existing arm_v8_* tests.

OK for the non-match.pd parts with those changes.  I don't feel
qualified to review the match.pd bits. :-)

Thanks,
Richard

> +    return [check_cached_effective_target arm_v8_5a_frintnzx_ok \
> +                check_effective_target_arm_v8_5a_frintnzx_ok_nocache]=20
> +}
> +
>  # Return 1 if the target supports executing the Armv8.1-M Mainline Low
>  # Overhead Loop, 0 otherwise.  The test is valid for ARM.
>=20=20