Re: [PATCH][AArch64] ACLE intrinsics: convert from BFloat16 to Float32

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Dennis Zhang <dennis.zhang@arm.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
	nd <nd@arm.com>, Richard Earnshaw <Richard.Earnshaw@arm.com>,
	Marcus Shawcroft <Marcus.Shawcroft@arm.com>,
	Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>,
	richard.sandiford@arm.com
Subject: Re: [PATCH][AArch64] ACLE intrinsics: convert from BFloat16 to Float32
Date: Mon, 2 Nov 2020 17:27:29 +0000	[thread overview]
Message-ID: <4245739e-ef4f-531c-6f50-278e4ec441fe@arm.com> (raw)
In-Reply-To: <mptmu049b4x.fsf@arm.com>

[-- Attachment #1: Type: text/plain, Size: 1447 bytes --]

Hi Richard,

On 10/29/20 5:48 PM, Richard Sandiford wrote:
> Dennis Zhang <Dennis.Zhang@arm.com> writes:
>> diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
>> index 5bc596dbffc..b68c3ca7f4b 100644
>> --- a/gcc/config/aarch64/aarch64-simd-builtins.def
>> +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
>> @@ -732,3 +732,8 @@
>>     VAR1 (UNOP, bfcvtn_q, 0, ALL, v8bf)
>>     VAR1 (BINOP, bfcvtn2, 0, ALL, v8bf)
>>     VAR1 (UNOP, bfcvt, 0, ALL, bf)
>> +
>> +  /* Implemented by aarch64_{v}bfcvt{_high}<mode>.  */
>> +  VAR2 (UNOP, vbfcvt, 0, ALL, v4bf, v8bf)
>> +  VAR1 (UNOP, vbfcvt_high, 0, ALL, v8bf)
>> +  VAR1 (UNOP, bfcvt, 0, ALL, sf)
> 
> New intrinsics should use something more specific than “ALL”.
> Since these functions are pure non-trapping integer operations,
> I think they should use “AUTO_FP” instead.  (On reflection,
> we should probably change the name.)
> 
>> +(define_insn "aarch64_bfcvtsf"
>> +  [(set (match_operand:SF 0 "register_operand" "=w")
>> +	(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
>> +		    UNSPEC_BFCVT))]
>> +  "TARGET_BF16_FP"
>> +  "shl\\t%d0, %d1, #16"
>> +  [(set_attr "type" "neon_shift_reg")]
> 
> I think this should be neon_shift_imm instead.
> 
> OK with those changes, thanks.
> 
> Richard
> 

I've fixed the Flag and the insn attribute.
I will commit it if no further issues.
Thanks for the review.

Regards
Dennis

[-- Attachment #2: a64-bfcvt-trunk-20201102.patch --]
[-- Type: text/x-patch, Size: 4481 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index eb8e6f7b3d8..f494b535a30 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -732,3 +732,8 @@
   VAR1 (UNOP, bfcvtn_q, 0, FP, v8bf)
   VAR1 (BINOP, bfcvtn2, 0, FP, v8bf)
   VAR1 (UNOP, bfcvt, 0, FP, bf)
+
+  /* Implemented by aarch64_{v}bfcvt{_high}<mode>.  */
+  VAR2 (UNOP, vbfcvt, 0, AUTO_FP, v4bf, v8bf)
+  VAR1 (UNOP, vbfcvt_high, 0, AUTO_FP, v8bf)
+  VAR1 (UNOP, bfcvt, 0, AUTO_FP, sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 381a702eba0..030a086d31c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7238,3 +7238,31 @@
   "bfcvt\\t%h0, %s1"
   [(set_attr "type" "f_cvt")]
 )
+
+;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
+(define_insn "aarch64_vbfcvt<mode>"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
+		      UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "shll\\t%0.4s, %1.4h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_vbfcvt_highv8bf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
+		      UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "shll2\\t%0.4s, %1.8h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_bfcvtsf"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+	(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
+		    UNSPEC_BFCVT))]
+  "TARGET_BF16_FP"
+  "shl\\t%d0, %d1, #16"
+  [(set_attr "type" "neon_shift_imm")]
+)
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index 984875dcc01..881615498d3 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a)
   return __builtin_aarch64_bfcvtbf (__a);
 }
 
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtah_f32_bf16 (bfloat16_t __a)
+{
+  return __builtin_aarch64_bfcvtsf (__a);
+}
+
 #pragma GCC pop_options
 
 #endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 95bfa5ebba2..69cccd32786 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35680,6 +35680,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
   return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
 }
 
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvt_f32_bf16 (bfloat16x4_t __a)
+{
+  return __builtin_aarch64_vbfcvtv4bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_low_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvtv8bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_high_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvt_highv8bf (__a);
+}
+
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvt_bf16_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
index bbea630b182..47af7c494d9 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
@@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a)
 {
   return vcvth_bf16_f32 (a);
 }
+
+/*
+**test_vcvt_f32_bf16:
+**     shll	v0.4s, v0.4h, #16
+**     ret
+*/
+float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a)
+{
+  return vcvt_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_low_f32_bf16:
+**     shll	v0.4s, v0.4h, #16
+**     ret
+*/
+float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_low_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_high_f32_bf16:
+**     shll2	v0.4s, v0.8h, #16
+**     ret
+*/
+float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_high_f32_bf16 (a);
+}
+
+/*
+**test_vcvtah_f32_bf16:
+**     shl	d0, d0, #16
+**     ret
+*/
+float32_t test_vcvtah_f32_bf16 (bfloat16_t a)
+{
+  return vcvtah_f32_bf16 (a);
+}

next prev parent reply	other threads:[~2020-11-02 17:28 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-26 17:25 [PATCH][AArch64] Enable CLI for Armv8.6-a: armv8.6-a, i8mm and bf16 Dennis Zhang
2019-11-29 13:02 ` Richard Sandiford
2019-12-05 15:31   ` Dennis Zhang
2019-12-06 10:22     ` Richard Sandiford
2019-12-12 17:01       ` Dennis Zhang
2019-12-13 10:23         ` Richard Sandiford
2020-10-29 12:19         ` [PATCH][AArch64] ACLE intrinsics: convert from BFloat16 to Float32 Dennis Zhang
2020-10-29 12:28           ` [PATCH][AArch64] ACLE intrinsics: get low/high half from BFloat16 vector Dennis Zhang
2020-10-30 14:07             ` Richard Sandiford
2020-11-03 11:16               ` Dennis Zhang
2020-11-03 14:05                 ` Richard Sandiford
2020-11-03 17:00                   ` Dennis Zhang
2020-11-05 20:07                 ` Christophe Lyon
2020-10-29 17:48           ` [PATCH][AArch64] ACLE intrinsics: convert from BFloat16 to Float32 Richard Sandiford
2020-11-02 17:27             ` Dennis Zhang [this message]
2020-11-02 19:05               ` Richard Sandiford
2020-11-03 13:06                 ` Dennis Zhang
2020-12-10 14:26                   ` [backport gcc-10][AArch64] ACLE bf16 convert Dennis Zhang
2020-12-10 14:34                     ` [backport gcc-10][AArch64] ACLE bf16 get Dennis Zhang
2020-12-11 11:58                       ` Kyrylo Tkachov
2020-12-11 16:31                         ` Dennis Zhang
2020-12-11 11:23                     ` [backport gcc-10][AArch64] ACLE bf16 convert Kyrylo Tkachov
2020-12-11 16:35                       ` Dennis Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4245739e-ef4f-531c-6f50-278e4ec441fe@arm.com \
    --to=dennis.zhang@arm.com \
    --cc=Kyrylo.Tkachov@arm.com \
    --cc=Marcus.Shawcroft@arm.com \
    --cc=Richard.Earnshaw@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=nd@arm.com \
    --cc=richard.sandiford@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).