public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Andrea Corallo <andrea.corallo@arm.com>
To: gcc-patches@gcc.gnu.org
Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>,
	 Richard Earnshaw <Richard.Earnshaw@arm.com>,  nd <nd@arm.com>
Subject: [PATCH 1/x] arm: Add vld1_lane_bf16 + vldq_lane_bf16 intrinsics
Date: Mon, 26 Oct 2020 16:58:56 +0100	[thread overview]
Message-ID: <gkrv9exnflr.fsf@arm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 323 bytes --]

Hi all,

I'd like to submit the following patch implementing the bfloat16_t
neon related load intrinsics: vld1_lane_bf16, vld1q_lane_bf16.

Please see refer to:
ACLE <https://developer.arm.com/docs/101028/latest>
ISA  <https://developer.arm.com/docs/ddi0596/latest>

Regtested and bootstrapped.

Okay for trunk?

  Andrea


[-- Attachment #2: 0001-arm-Add-vld1_lane_bf16-vldq_lane_bf16-intrinsics.patch --]
[-- Type: text/plain, Size: 5243 bytes --]

From 64e375906abeba1ab14d06106a9714b0371b7105 Mon Sep 17 00:00:00 2001
From: Andrea Corallo <andrea.corallo@arm.com>
Date: Wed, 21 Oct 2020 11:16:01 +0200
Subject: [PATCH] arm: Add vld1_lane_bf16 + vldq_lane_bf16 intrinsics

gcc/ChangeLog

2020-10-21  Andrea Corallo  <andrea.corallo@arm.com>

	* config/arm/arm_neon_builtins.def: Add to LOAD1LANE v4bf, v8bf.
	* config/arm/arm_neon.h (vld1_lane_bf16, vld1q_lane_bf16): Add
	intrinsics.

gcc/testsuite/ChangeLog

2020-10-21  Andrea Corallo  <andrea.corallo@arm.com>

	* gcc.target/arm/simd/vld1_lane_bf16_1.c: New testcase.
	* gcc.target/arm/simd/vld1_lane_bf16_indices_1.c: Likewise.
	* gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c: Likewise.
---
 gcc/config/arm/arm_neon.h                     | 14 +++++++++++++
 gcc/config/arm/arm_neon_builtins.def          |  4 ++--
 .../gcc.target/arm/simd/vld1_lane_bf16_1.c    | 21 +++++++++++++++++++
 .../arm/simd/vld1_lane_bf16_indices_1.c       | 17 +++++++++++++++
 .../arm/simd/vld1q_lane_bf16_indices_1.c      | 17 +++++++++++++++
 5 files changed, 71 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c

diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index aa21730dea0..fcd8020425e 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -19665,6 +19665,20 @@ vld4q_dup_bf16 (const bfloat16_t * __ptr)
   return __rv.__i;
 }
 
+__extension__ extern __inline bfloat16x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_bf16 (const bfloat16_t * __a, bfloat16x4_t __b, const int __c)
+{
+  return __builtin_neon_vld1_lanev4bf (__a, __b, __c);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_bf16 (const bfloat16_t * __a, bfloat16x8_t __b, const int __c)
+{
+  return __builtin_neon_vld1_lanev8bf (__a, __b, __c);
+}
+
 #pragma GCC pop_options
 
 #ifdef __cplusplus
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 34c1945c0a1..7cdcd251243 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -312,8 +312,8 @@ VAR1 (TERNOP, vtbx3, v8qi)
 VAR1 (TERNOP, vtbx4, v8qi)
 VAR12 (LOAD1, vld1,
         v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
-VAR10 (LOAD1LANE, vld1_lane,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR12 (LOAD1LANE, vld1_lane,
+        v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf)
 VAR10 (LOAD1, vld1_dup,
 	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 VAR12 (STORE1, vst1,
diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c
new file mode 100644
index 00000000000..fa4e45b7217
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+#include "arm_neon.h"
+
+bfloat16x4_t
+test_vld1_lane_bf16 (bfloat16_t *a, bfloat16x4_t b)
+{
+  return vld1_lane_bf16 (a, b, 1);
+}
+
+bfloat16x8_t
+test_vld1q_lane_bf16 (bfloat16_t *a, bfloat16x8_t b)
+{
+  return vld1q_lane_bf16 (a, b, 2);
+}
+
+/* { dg-final { scan-assembler "vld1.16\t{d0\\\[1\\\]}, \\\[r0\\\]" } } */
+/* { dg-final { scan-assembler "vld1.16\t{d0\\\[2\\\]}, \\\[r0\\\]" } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c
new file mode 100644
index 00000000000..c83eb53234d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include "arm_neon.h"
+
+bfloat16x4_t
+test_vld1_lane_bf16 (bfloat16_t *a, bfloat16x4_t b)
+{
+  bfloat16x4_t res;
+  res = vld1_lane_bf16 (a, b, -1);
+  res = vld1_lane_bf16 (a, b, 4);
+  return res;
+}
+
+/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c
new file mode 100644
index 00000000000..8e21e61c9c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include "arm_neon.h"
+
+bfloat16x8_t
+test_vld1q_lane_bf16 (bfloat16_t *a, bfloat16x8_t b)
+{
+  bfloat16x8_t res;
+  res = vld1q_lane_bf16 (a, b, -1);
+  res = vld1q_lane_bf16 (a, b, 8);
+  return res;
+}
+
+/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
-- 
2.20.1


             reply	other threads:[~2020-10-26 15:59 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-26 15:58 Andrea Corallo [this message]
2020-11-02  8:38 ` Andrea Corallo
2020-11-03 10:27 ` Kyrylo Tkachov
2020-11-04 13:29   ` Christophe Lyon
2020-11-04 13:37     ` Christophe Lyon
2020-11-04 14:10       ` Andrea Corallo
2020-11-05 11:11       ` [PATCH] arm: [testcase] Better narrow some bfloat16 testcase Andrea Corallo
2020-11-05 12:03         ` Christophe Lyon
2020-11-05 14:30           ` Andrea Corallo
2020-11-05 20:28             ` Christophe Lyon
2020-11-06 14:06               ` [PATCH V2] " Andrea Corallo
2020-11-07  7:49                 ` Christophe Lyon
2020-11-09 10:05                   ` Andrea Corallo
2020-11-09 11:14                     ` Kyrylo Tkachov
2020-11-09 11:40                       ` Andrea Corallo
2020-11-04 14:20     ` [PATCH 1/x] arm: Add vld1_lane_bf16 + vldq_lane_bf16 intrinsics Andrea Corallo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=gkrv9exnflr.fsf@arm.com \
    --to=andrea.corallo@arm.com \
    --cc=Kyrylo.Tkachov@arm.com \
    --cc=Richard.Earnshaw@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=nd@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).