public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][GCC][ARM][AArch64] Testsuite framework changes and execution tests [Patch (8/8)]
@ 2017-09-01 13:24 Tamar Christina
  2017-09-04 12:40 ` Christophe Lyon
  0 siblings, 1 reply; 10+ messages in thread
From: Tamar Christina @ 2017-09-01 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, james.greenhalgh, Richard.Earnshaw, Marcus.Shawcroft

[-- Attachment #1: Type: text/plain, Size: 913 bytes --]

Hi All,

This patch enables the execution runs for Dot product and also
adds the feature tests.

The ARMv8.2-a Dot Product instructions only support 8-bit
element vectorization.

Dot product is available from ARMv8.2-a and onwards.

Regtested and bootstrapped on aarch64-none-elf and
arm-none-eabi and no issues.

Ok for trunk?

gcc/testsuite
2017-09-01  Tamar Christina  <tamar.christina@arm.com>

	* lib/target-supports.exp
	(check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): New.
	(check_effective_target_arm_v8_2a_dotprod_neon_ok): New.
	(add_options_for_arm_v8_2a_dotprod_neon): New.
	(check_effective_target_arm_v8_2a_dotprod_neon_hw): New.
	(check_effective_target_vect_sdot_qi): New.
	(check_effective_target_vect_udot_qi): New.
	* gcc.target/arm/simd/vdot-exec.c: New.
	* gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c: New.
	* gcc/doc/sourcebuild.texi: Document arm_v8_2a_dotprod_neon.

-- 

[-- Attachment #2: 8007-diff.patch --]
[-- Type: text/x-diff, Size: 11599 bytes --]

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index e6313dc031ef5b2b5a72180bccf1e876812efe48..bb6fe68a460dd6a699a76953e221028a15997001 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1678,6 +1678,17 @@ ARM target supports executing instructions from ARMv8.2 with the FP16
 extension.  Some multilibs may be incompatible with these options.
 Implies arm_v8_2a_fp16_neon_ok and arm_v8_2a_fp16_scalar_hw.
 
+@item arm_v8_2a_dotprod_neon_ok
+@anchor{arm_v8_2a_dotprod_neon_ok}
+ARM target supports options to generate instructions from ARMv8.2 with
+the Dot Product extension. Some multilibs may be incompatible with these
+options.
+
+@item arm_v8_2a_dotprod_neon_hw
+ARM target supports executing instructions from ARMv8.2 with the Dot
+Product extension. Some multilibs may be incompatible with these options.
+Implies arm_v8_2a_dotprod_neon_ok.
+
 @item arm_prefer_ldrd_strd
 ARM target prefers @code{LDRD} and @code{STRD} instructions over
 @code{LDM} and @code{STM} instructions.
@@ -2269,6 +2280,11 @@ supported by the target; see the
 @ref{arm_v8_2a_fp16_neon_ok,,arm_v8_2a_fp16_neon_ok} effective target
 keyword.
 
+@item arm_v8_2a_dotprod_neon
+Add options for ARMv8.2 with Adv.SIMD Dot Product support, if this is
+supported by the target; see the
+@ref{arm_v8_2a_dotprod_neon_ok} effective target keyword.
+
 @item bind_pic_locally
 Add the target-specific flags needed to enable functions to bind
 locally when using pic/PIC passes in the testsuite.
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c
new file mode 100644
index 0000000000000000000000000000000000000000..96d7f0ebc4fd89e966a17b2d7bb6b17e4b511c67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c
@@ -0,0 +1,75 @@
+/* { dg-skip-if "can't compile on arm." { arm*-*-* } } */
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */
+
+#include <arm_neon.h>
+
+extern void abort();
+
+#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2
+#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ }
+#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \
+	ARR(f, x, t1, r1);		    \
+	ARR(f, y, t2, r2);		    \
+	t3 f##_##r = {0};		    \
+	f##_##r = f (f##_##r, f##_##x, f##_##y);  \
+	if (f##_##r[0] != n1 || f##_##r[1] != n2)   \
+	  abort ();
+
+#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \
+	ARR(f, x, t1, r1);		    \
+	ARR(f, y, t2, r2);		    \
+	t3 f##_##rx = {0};		    \
+	f##_##rx = f (f##_##rx, f##_##x, f##_##y, 0);  \
+	if (f##_##rx[0] != n1 || f##_##rx[1] != n2)   \
+	  abort ();				    \
+	t3 f##_##rx1 = {0};			    \
+	f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, 1);  \
+	if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4)   \
+	  abort ();
+
+#define Px(n1,n2,n3,n4) P(n1,n2),P(n3,n4)
+#define TEST_LANEQ(t1, t2, t3, f, r1, r2, n1, n2, n3, n4, n5, n6, n7, n8) \
+	ARR(f, x, t1, r1);		    \
+	ARR(f, y, t2, r2);		    \
+	t3 f##_##rx = {0};		    \
+	f##_##rx = f (f##_##rx, f##_##x, f##_##y, 0);  \
+	if (f##_##rx[0] != n1 || f##_##rx[1] != n2)   \
+	  abort ();				    \
+	t3 f##_##rx1 = {0};			    \
+	f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, 1);  \
+	if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4)   \
+	  abort (); \
+	t3 f##_##rx2 = {0};				    \
+	f##_##rx2 = f (f##_##rx2, f##_##x, f##_##y, 2);  \
+	if (f##_##rx2[0] != n5 || f##_##rx2[1] != n6)   \
+	  abort ();				    \
+	t3 f##_##rx3 = {0};			    \
+	f##_##rx3 = f (f##_##rx3, f##_##x, f##_##y, 3);  \
+	if (f##_##rx3[0] != n7 || f##_##rx3[1] != n8)   \
+	  abort ();
+
+int
+main()
+{
+  TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24);
+  TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24);
+
+  TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24);
+  TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24);
+
+  TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
+  TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
+
+  TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
+  TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
+
+  TEST_LANEQ (uint8x8_t, uint8x16_t, uint32x2_t, vdot_laneq_u32, P(1,2), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32);
+  TEST_LANEQ (int8x8_t, int8x16_t, int32x2_t, vdot_laneq_s32, P(1,2), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32);
+
+  TEST_LANEQ (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_laneq_u32, Px(1,2,2,1), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32);
+  TEST_LANEQ (int8x16_t, int8x16_t, int32x4_t, vdotq_laneq_s32, Px(1,2,2,1), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c
new file mode 100644
index 0000000000000000000000000000000000000000..bb5fb114f9b3ac975b7ae9b7ef0f101a891c0c2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3" } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */
+/* { dg-add-options arm_v8_2a_dotprod_neon }  */
+
+#include <arm_neon.h>
+
+extern void abort();
+
+#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2
+#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ }
+#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \
+	ARR(f, x, t1, r1);		    \
+	ARR(f, y, t2, r2);		    \
+	t3 f##_##r = {0};		    \
+	f##_##r = f (f##_##r, f##_##x, f##_##y);  \
+	if (f##_##r[0] != n1 || f##_##r[1] != n2)   \
+	  abort ();
+
+#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2
+#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ }
+#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \
+	ARR(f, x, t1, r1);		    \
+	ARR(f, y, t2, r2);		    \
+	t3 f##_##rx = {0};		    \
+	f##_##rx = f (f##_##rx, f##_##x, f##_##y, 0);  \
+	if (f##_##rx[0] != n1 || f##_##rx[1] != n2)   \
+	  abort ();				    \
+	t3 f##_##rx1 = {0};			    \
+	f##_##rx1 =  f (f##_##rx1, f##_##x, f##_##y, 1);  \
+	if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4)   \
+	  abort (); \
+
+int
+main()
+{
+  TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24);
+  TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24);
+
+  TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24);
+  TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24);
+
+  TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
+
+  TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
+
+  TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24);
+  TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24);
+
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 5219fbf4671e83a6fa7affdab926115e8a23f9cb..77d75b06a74b7a5925b6616d1880a5ec598d9f7a 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4217,6 +4217,48 @@ proc check_effective_target_arm_v8_2a_fp16_neon_ok { } {
 		check_effective_target_arm_v8_2a_fp16_neon_ok_nocache]
 }
 
+# Return 1 if the target supports ARMv8.2 Adv.SIMD Dot Product
+# instructions, 0 otherwise.  The test is valid for ARM and for AArch64.
+# Record the command line options needed.
+
+proc check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache { } {
+    global et_arm_v8_2a_dotprod_neon_flags
+    set et_arm_v8_2a_dotprod_neon_flags ""
+
+    if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } {
+        return 0;
+    }
+
+    # Iterate through sets of options to find the compiler flags that
+    # need to be added to the -march option.
+    foreach flags {"" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" "-mfloat-abi=hard -mfpu=neon-fp-armv8"} {
+        if { [check_no_compiler_messages_nocache \
+                  arm_v8_2a_dotprod_neon_ok object {
+            #if !defined (__ARM_FEATURE_DOTPROD)
+            #error "__ARM_FEATURE_DOTPROD not defined"
+            #endif
+        } "$flags -march=armv8.2-a+dotprod"] } {
+            set et_arm_v8_2a_dotprod_neon_flags "$flags -march=armv8.2-a+dotprod"
+            return 1
+        }
+    }
+
+    return 0;
+}
+
+proc check_effective_target_arm_v8_2a_dotprod_neon_ok { } {
+    return [check_cached_effective_target arm_v8_2a_dotprod_neon_ok \
+                check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache]
+}
+
+proc add_options_for_arm_v8_2a_dotprod_neon { flags } {
+    if { ! [check_effective_target_arm_v8_2a_dotprod_neon_ok] } {
+        return "$flags"
+    }
+    global et_arm_v8_2a_dotprod_neon_flags
+    return "$flags $et_arm_v8_2a_dotprod_neon_flags"
+}
+
 # Return 1 if the target supports executing ARMv8 NEON instructions, 0
 # otherwise.
 
@@ -4354,6 +4396,42 @@ proc check_effective_target_arm_v8_2a_fp16_neon_hw { } {
     } [add_options_for_arm_v8_2a_fp16_neon ""]]
 }
 
+# Return 1 if the target supports executing AdvSIMD instructions from ARMv8.2
+# with the Dot Product extension, 0 otherwise.  The test is valid for ARM and for
+# AArch64.
+
+proc check_effective_target_arm_v8_2a_dotprod_neon_hw { } {
+    if { ![check_effective_target_arm_v8_2a_dotprod_neon_ok] } {
+        return 0;
+    }
+    return [check_runtime arm_v8_2a_dotprod_neon_hw_available {
+        #include "arm_neon.h"
+        int
+        main (void)
+        {
+
+	  uint32x2_t results = {0,0};
+	  uint8x8_t a = {1,1,1,1,2,2,2,2};
+	  uint8x8_t b = {2,2,2,2,3,3,3,3};
+
+          #ifdef __ARM_ARCH_ISA_A64
+          asm ("udot %0.2s, %1.8b, %2.8b"
+               : "=w"(results)
+               : "w"(a), "w"(b)
+               : /* No clobbers.  */);
+
+	  #elif __ARM_ARCH >= 8
+          asm ("vudot.u8 %P0, %P1, %P2"
+               : "=w"(results)
+               : "w"(a), "w"(b)
+               : /* No clobbers.  */);
+          #endif
+
+          return (results[0] == 8 && results[1] == 24) ? 1 : 0;
+        }
+    } [add_options_for_arm_v8_2a_dotprod_neon ""]]
+}
+
 # Return 1 if this is a ARM target with NEON enabled.
 
 proc check_effective_target_arm_neon { } {
@@ -5619,6 +5697,8 @@ proc check_effective_target_vect_sdot_qi { } {
     } else {
 	set et_vect_sdot_qi_saved($et_index) 0
 	if { [istarget ia64-*-*]
+	     || [istarget aarch64*-*-*]
+	     || [istarget arm*-*-*]
 	     || ([istarget mips*-*-*]
 		 && [et-is-effective-target mips_msa]) } {
             set et_vect_udot_qi_saved 1
@@ -5643,6 +5723,8 @@ proc check_effective_target_vect_udot_qi { } {
     } else {
 	set et_vect_udot_qi_saved($et_index) 0
         if { [istarget powerpc*-*-*]
+	     || [istarget aarch64*-*-*]
+	     || [istarget arm*-*-*]
 	     || [istarget ia64-*-*]
 	     || ([istarget mips*-*-*]
 		 && [et-is-effective-target mips_msa]) } {
@@ -7952,7 +8034,7 @@ proc check_effective_target_aarch64_tiny { } {
 # Create functions to check that the AArch64 assembler supports the
 # various architecture extensions via the .arch_extension pseudo-op.
 
-foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse"} {
+foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod"} {
     eval [string map [list FUNC $aarch64_ext] {
 	proc check_effective_target_aarch64_asm_FUNC_ok { } {
 	  if { [istarget aarch64*-*-*] } {


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-11-15 11:04 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-01 13:24 [PATCH][GCC][ARM][AArch64] Testsuite framework changes and execution tests [Patch (8/8)] Tamar Christina
2017-09-04 12:40 ` Christophe Lyon
2017-09-04 13:01   ` Tamar Christina
2017-10-06 12:45     ` Tamar Christina
2017-10-23  9:38       ` Tamar Christina
2017-10-24 15:28       ` James Greenhalgh
2017-10-26  7:21         ` Tamar Christina
2017-10-26  8:12           ` James Greenhalgh
2017-10-26  9:05       ` Kyrill Tkachov
2017-11-15 11:11       ` Kyrill Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).