public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][AArch64] Vectorise bswap[16,32,64]
@ 2014-04-15 11:25 Kyrill Tkachov
  2014-04-15 17:50 ` Eric Christopher
  0 siblings, 1 reply; 4+ messages in thread
From: Kyrill Tkachov @ 2014-04-15 11:25 UTC (permalink / raw)
  To: GCC Patches; +Cc: Marcus Shawcroft, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 1336 bytes --]

Hi all,

This patch enables aarch64 to vectorise bswap[16,32,64] operations by using the 
AdvancedSIMD forms of the rev[16,32,64] instructions.

The TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION hook is extended to return the 
vectorised forms of __builtin_bswap* where possible and vector bswap patterns 
are added.

I've added the tests in vect.exp and a new effective target check (vect_bswap) 
that can be extended for other targets in the future if they can also vectorise 
these operations. Is that ok?

Bootstrapped and tested aarch64-none-linux-gnu.

Ok for trunk?

Thanks,
Kyrill

2014-04-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64-builtins.c
     (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16,
     BUILT_IN_BSWAP32, BUILT_IN_BSWAP64.
     * config/aarch64/aarch64-simd.md (bswap<mode>): New pattern.
     * config/aarch64/aarch64-simd-builtins.def: Define vector bswap
     builtins.
     * config/aarch64/iterator.md (VDQHSD): New mode iterator.
     (Vrevsuff): New mode attribute.

2014-04-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * lib/target-supports.exp (check_effective_target_vect_bswap): New.
     * gcc.dg/vect/vect-bswap16: New test.
     * gcc.dg/vect/vect-bswap32: Likewise.
     * gcc.dg/vect/vect-bswap64: Likewise.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: aarch64-vbswap.patch --]
[-- Type: text/x-patch; name=aarch64-vbswap.patch, Size: 7670 bytes --]

commit 0d6d820881443a7ce7f9bd51f35aff04866c5e57
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Thu Apr 3 09:22:14 2014 +0100

    [AArch64] vectorise bswap

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 55cfe0a..d839a40 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1086,7 +1086,29 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 
 	    return aarch64_builtin_decls[builtin];
 	  }
-
+	case BUILT_IN_BSWAP16:
+#undef AARCH64_CHECK_BUILTIN_MODE
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+  (out_mode == N##Imode && out_n == C \
+   && in_mode == N##Imode && in_n == C)
+	  if (AARCH64_CHECK_BUILTIN_MODE (4, H))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
+	  else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
+	  else
+	    return NULL_TREE;
+	case BUILT_IN_BSWAP32:
+	  if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
+	  else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
+	  else
+	    return NULL_TREE;
+	case BUILT_IN_BSWAP64:
+	  if (AARCH64_CHECK_BUILTIN_MODE (2, D))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
+	  else
+	    return NULL_TREE;
 	default:
 	  return NULL_TREE;
       }
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index c9b7570..e9736da 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -330,6 +330,8 @@
   VAR1 (UNOP, floatunsv4si, 2, v4sf)
   VAR1 (UNOP, floatunsv2di, 2, v2df)
 
+  VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di)
+
   /* Implemented by
      aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>.  */
   BUILTIN_VALL (BINOP, zip1, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 73aee2c..75db3e8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -286,6 +286,14 @@
   [(set_attr "type" "neon_mul_<Vetype><q>")]
 )
 
+(define_insn "bswap<mode>"
+  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
+        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
+  [(set_attr "type" "neon_rev<q>")]
+)
+
 (define_insn "*aarch64_mul3_elt<mode>"
  [(set (match_operand:VMUL 0 "register_operand" "=w")
     (mult:VMUL
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index f1339b8..2b5ebd1 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -150,6 +150,9 @@
 ;; Vector modes for H and S types.
 (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI])
 
+;; Vector modes for H, S and D types.
+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
+
 ;; Vector modes for Q, H and S types.
 (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI])
 
@@ -352,6 +355,9 @@
                          (V2DI "2d") (V2SF "2s")
 			 (V4SF "4s") (V2DF "2d")])
 
+(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
+                            (V4SI "32") (V2DI "64")])
+
 (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
 			 (V4HI ".4h") (V8HI  ".8h")
 			 (V2SI ".2s") (V4SI  ".4s")
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
new file mode 100644
index 0000000..b452a29
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_bswap } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+volatile int y = 0;
+
+static inline void
+vfoo16 (unsigned short int* a)
+{
+  int i = 0;
+  for (i = 0; i < N; ++i)
+    a[i] = __builtin_bswap16 (a[i]);
+}
+
+int
+main (void)
+{
+  unsigned short arr[N];
+  unsigned short expect[N];
+  int i;
+
+  for (i < 0; i < N; ++i)
+    {
+      arr[i] = i;
+      expect[i] = __builtin_bswap16 (i);
+      if (y) /* Avoid vectorisation.  */
+        abort ();
+    }
+
+  vfoo16 (arr);
+
+  for (i = 0; i < N; ++i)
+    {
+      if (arr[i] != expect[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap32.c b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
new file mode 100644
index 0000000..f90f853
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_bswap } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+volatile int y = 0;
+
+static inline void
+vfoo32 (unsigned int* a)
+{
+  int i = 0;
+  for (i = 0; i < N; ++i)
+    a[i] = __builtin_bswap32 (a[i]);
+}
+
+int
+main (void)
+{
+  unsigned int arr[N];
+  unsigned int expect[N];
+  int i;
+
+  for (i < 0; i < N; ++i)
+    {
+      arr[i] = i;
+      expect[i] = __builtin_bswap32 (i);
+      if (y) /* Avoid vectorisation.  */
+        abort ();
+    }
+
+  vfoo32 (arr);
+
+  for (i = 0; i < N; ++i)
+    {
+      if (arr[i] != expect[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap64.c b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
new file mode 100644
index 0000000..319ab34
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_bswap } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+volatile int y = 0;
+
+static inline void
+vfoo64 (unsigned long long* a)
+{
+  int i = 0;
+  for (i = 0; i < N; ++i)
+    a[i] = __builtin_bswap64 (a[i]);
+}
+
+int
+main (void)
+{
+  unsigned long long arr[N];
+  unsigned long long expect[N];
+  int i;
+
+  for (i < 0; i < N; ++i)
+    {
+      arr[i] = i;
+      expect[i] = __builtin_bswap64 (i);
+      if (y) /* Avoid vectorisation.  */
+        abort ();
+    }
+
+  vfoo64 (arr);
+
+  for (i = 0; i < N; ++i)
+    {
+      if (arr[i] != expect[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 57b10d0..910b3dc 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3271,6 +3271,24 @@ proc check_effective_target_vect_shift { } {
     return $et_vect_shift_saved
 }
 
+# Return 1 if the target supports vector bswap operations.
+
+proc check_effective_target_vect_bswap { } {
+    global et_vect_bswap_saved
+
+    if [info exists et_vect_bswap_saved] {
+	verbose "check_effective_target_vect_bswap: using cached result" 2
+    } else {
+	set et_vect_bswap_saved 0
+	if { [istarget aarch64*-*-*] } {
+	   set et_vect_bswap_saved 1
+	}
+    }
+
+    verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2
+    return $et_vect_bswap_saved
+}
+
 # Return 1 if the target supports hardware vector shift operation for char.
 
 proc check_effective_target_vect_shift_char { } {

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64] Vectorise bswap[16,32,64]
  2014-04-15 11:25 [PATCH][AArch64] Vectorise bswap[16,32,64] Kyrill Tkachov
@ 2014-04-15 17:50 ` Eric Christopher
  2014-04-16  8:26   ` Kyrill Tkachov
  0 siblings, 1 reply; 4+ messages in thread
From: Eric Christopher @ 2014-04-15 17:50 UTC (permalink / raw)
  To: Kyrill Tkachov; +Cc: GCC Patches, Marcus Shawcroft, Richard Earnshaw

Testcase weirdness?

  for (i < 0; i < N; ++i)
    {
      arr[i] = i;
      expect[i] = __builtin_bswap64 (i);
      if (y) /* Avoid vectorisation.  */
abort ();
    }

i < 0 :)

duplicated in all 3 testcases btw.

-eric


On Tue, Apr 15, 2014 at 4:25 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
> Hi all,
>
> This patch enables aarch64 to vectorise bswap[16,32,64] operations by using
> the AdvancedSIMD forms of the rev[16,32,64] instructions.
>
> The TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION hook is extended to return
> the vectorised forms of __builtin_bswap* where possible and vector bswap
> patterns are added.
>
> I've added the tests in vect.exp and a new effective target check
> (vect_bswap) that can be extended for other targets in the future if they
> can also vectorise these operations. Is that ok?
>
> Bootstrapped and tested aarch64-none-linux-gnu.
>
> Ok for trunk?
>
> Thanks,
> Kyrill
>
> 2014-04-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     * config/aarch64/aarch64-builtins.c
>     (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16,
>     BUILT_IN_BSWAP32, BUILT_IN_BSWAP64.
>     * config/aarch64/aarch64-simd.md (bswap<mode>): New pattern.
>     * config/aarch64/aarch64-simd-builtins.def: Define vector bswap
>     builtins.
>     * config/aarch64/iterator.md (VDQHSD): New mode iterator.
>     (Vrevsuff): New mode attribute.
>
> 2014-04-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     * lib/target-supports.exp (check_effective_target_vect_bswap): New.
>     * gcc.dg/vect/vect-bswap16: New test.
>     * gcc.dg/vect/vect-bswap32: Likewise.
>     * gcc.dg/vect/vect-bswap64: Likewise.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64] Vectorise bswap[16,32,64]
  2014-04-15 17:50 ` Eric Christopher
@ 2014-04-16  8:26   ` Kyrill Tkachov
  2014-04-24  8:00     ` Marcus Shawcroft
  0 siblings, 1 reply; 4+ messages in thread
From: Kyrill Tkachov @ 2014-04-16  8:26 UTC (permalink / raw)
  To: Eric Christopher; +Cc: GCC Patches, Marcus Shawcroft, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 1900 bytes --]

On 15/04/14 18:45, Eric Christopher wrote:
> Testcase weirdness?
>
>    for (i < 0; i < N; ++i)
>      {
>        arr[i] = i;
>        expect[i] = __builtin_bswap64 (i);
>        if (y) /* Avoid vectorisation.  */
> abort ();
>      }
>
> i < 0 :)
>
> duplicated in all 3 testcases btw.
Oops, here it is fixed.

Thanks for catching this.
Kyrill

>
> -eric
>
>
> On Tue, Apr 15, 2014 at 4:25 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
>> Hi all,
>>
>> This patch enables aarch64 to vectorise bswap[16,32,64] operations by using
>> the AdvancedSIMD forms of the rev[16,32,64] instructions.
>>
>> The TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION hook is extended to return
>> the vectorised forms of __builtin_bswap* where possible and vector bswap
>> patterns are added.
>>
>> I've added the tests in vect.exp and a new effective target check
>> (vect_bswap) that can be extended for other targets in the future if they
>> can also vectorise these operations. Is that ok?
>>
>> Bootstrapped and tested aarch64-none-linux-gnu.
>>
>> Ok for trunk?
>>
>> Thanks,
>> Kyrill
>>
>> 2014-04-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>      * config/aarch64/aarch64-builtins.c
>>      (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16,
>>      BUILT_IN_BSWAP32, BUILT_IN_BSWAP64.
>>      * config/aarch64/aarch64-simd.md (bswap<mode>): New pattern.
>>      * config/aarch64/aarch64-simd-builtins.def: Define vector bswap
>>      builtins.
>>      * config/aarch64/iterator.md (VDQHSD): New mode iterator.
>>      (Vrevsuff): New mode attribute.
>>
>> 2014-04-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>      * lib/target-supports.exp (check_effective_target_vect_bswap): New.
>>      * gcc.dg/vect/vect-bswap16: New test.
>>      * gcc.dg/vect/vect-bswap32: Likewise.
>>      * gcc.dg/vect/vect-bswap64: Likewise.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: aarch64-vbswap.patch --]
[-- Type: text/x-patch; name=aarch64-vbswap.patch, Size: 7670 bytes --]

commit 0d6d820881443a7ce7f9bd51f35aff04866c5e57
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Thu Apr 3 09:22:14 2014 +0100

    [AArch64] vectorise bswap

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 55cfe0a..d839a40 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1086,7 +1086,29 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 
 	    return aarch64_builtin_decls[builtin];
 	  }
-
+	case BUILT_IN_BSWAP16:
+#undef AARCH64_CHECK_BUILTIN_MODE
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+  (out_mode == N##Imode && out_n == C \
+   && in_mode == N##Imode && in_n == C)
+	  if (AARCH64_CHECK_BUILTIN_MODE (4, H))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
+	  else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
+	  else
+	    return NULL_TREE;
+	case BUILT_IN_BSWAP32:
+	  if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
+	  else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
+	  else
+	    return NULL_TREE;
+	case BUILT_IN_BSWAP64:
+	  if (AARCH64_CHECK_BUILTIN_MODE (2, D))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
+	  else
+	    return NULL_TREE;
 	default:
 	  return NULL_TREE;
       }
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index c9b7570..e9736da 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -330,6 +330,8 @@
   VAR1 (UNOP, floatunsv4si, 2, v4sf)
   VAR1 (UNOP, floatunsv2di, 2, v2df)
 
+  VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di)
+
   /* Implemented by
      aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>.  */
   BUILTIN_VALL (BINOP, zip1, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 73aee2c..75db3e8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -286,6 +286,14 @@
   [(set_attr "type" "neon_mul_<Vetype><q>")]
 )
 
+(define_insn "bswap<mode>"
+  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
+        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
+  [(set_attr "type" "neon_rev<q>")]
+)
+
 (define_insn "*aarch64_mul3_elt<mode>"
  [(set (match_operand:VMUL 0 "register_operand" "=w")
     (mult:VMUL
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index f1339b8..2b5ebd1 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -150,6 +150,9 @@
 ;; Vector modes for H and S types.
 (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI])
 
+;; Vector modes for H, S and D types.
+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
+
 ;; Vector modes for Q, H and S types.
 (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI])
 
@@ -352,6 +355,9 @@
                          (V2DI "2d") (V2SF "2s")
 			 (V4SF "4s") (V2DF "2d")])
 
+(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
+                            (V4SI "32") (V2DI "64")])
+
 (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
 			 (V4HI ".4h") (V8HI  ".8h")
 			 (V2SI ".2s") (V4SI  ".4s")
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
new file mode 100644
index 0000000..b452a29
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_bswap } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+volatile int y = 0;
+
+static inline void
+vfoo16 (unsigned short int* a)
+{
+  int i = 0;
+  for (i = 0; i < N; ++i)
+    a[i] = __builtin_bswap16 (a[i]);
+}
+
+int
+main (void)
+{
+  unsigned short arr[N];
+  unsigned short expect[N];
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      arr[i] = i;
+      expect[i] = __builtin_bswap16 (i);
+      if (y) /* Avoid vectorisation.  */
+        abort ();
+    }
+
+  vfoo16 (arr);
+
+  for (i = 0; i < N; ++i)
+    {
+      if (arr[i] != expect[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap32.c b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
new file mode 100644
index 0000000..f90f853
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_bswap } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+volatile int y = 0;
+
+static inline void
+vfoo32 (unsigned int* a)
+{
+  int i = 0;
+  for (i = 0; i < N; ++i)
+    a[i] = __builtin_bswap32 (a[i]);
+}
+
+int
+main (void)
+{
+  unsigned int arr[N];
+  unsigned int expect[N];
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      arr[i] = i;
+      expect[i] = __builtin_bswap32 (i);
+      if (y) /* Avoid vectorisation.  */
+        abort ();
+    }
+
+  vfoo32 (arr);
+
+  for (i = 0; i < N; ++i)
+    {
+      if (arr[i] != expect[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap64.c b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
new file mode 100644
index 0000000..319ab34
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_bswap } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+volatile int y = 0;
+
+static inline void
+vfoo64 (unsigned long long* a)
+{
+  int i = 0;
+  for (i = 0; i < N; ++i)
+    a[i] = __builtin_bswap64 (a[i]);
+}
+
+int
+main (void)
+{
+  unsigned long long arr[N];
+  unsigned long long expect[N];
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      arr[i] = i;
+      expect[i] = __builtin_bswap64 (i);
+      if (y) /* Avoid vectorisation.  */
+        abort ();
+    }
+
+  vfoo64 (arr);
+
+  for (i = 0; i < N; ++i)
+    {
+      if (arr[i] != expect[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 57b10d0..910b3dc 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3271,6 +3271,24 @@ proc check_effective_target_vect_shift { } {
     return $et_vect_shift_saved
 }
 
+# Return 1 if the target supports vector bswap operations.
+
+proc check_effective_target_vect_bswap { } {
+    global et_vect_bswap_saved
+
+    if [info exists et_vect_bswap_saved] {
+	verbose "check_effective_target_vect_bswap: using cached result" 2
+    } else {
+	set et_vect_bswap_saved 0
+	if { [istarget aarch64*-*-*] } {
+	   set et_vect_bswap_saved 1
+	}
+    }
+
+    verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2
+    return $et_vect_bswap_saved
+}
+
 # Return 1 if the target supports hardware vector shift operation for char.
 
 proc check_effective_target_vect_shift_char { } {

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64] Vectorise bswap[16,32,64]
  2014-04-16  8:26   ` Kyrill Tkachov
@ 2014-04-24  8:00     ` Marcus Shawcroft
  0 siblings, 0 replies; 4+ messages in thread
From: Marcus Shawcroft @ 2014-04-24  8:00 UTC (permalink / raw)
  To: Kyrill Tkachov; +Cc: GCC Patches

On 16 April 2014 09:12, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
> On 15/04/14 18:45, Eric Christopher wrote:
>>
>> Testcase weirdness?
>>
>>    for (i < 0; i < N; ++i)
>>      {
>>        arr[i] = i;
>>        expect[i] = __builtin_bswap64 (i);
>>        if (y) /* Avoid vectorisation.  */
>> abort ();
>>      }
>>
>> i < 0 :)
>>
>> duplicated in all 3 testcases btw.
>
> Oops, here it is fixed.
>
> Thanks for catching this.
> Kyrill

Fixed version OK /Marcus

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-04-24  7:54 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-15 11:25 [PATCH][AArch64] Vectorise bswap[16,32,64] Kyrill Tkachov
2014-04-15 17:50 ` Eric Christopher
2014-04-16  8:26   ` Kyrill Tkachov
2014-04-24  8:00     ` Marcus Shawcroft

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).