public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-4923] AArch64: Lower intrinsics shift to GIMPLE when possible.
@ 2021-11-04 17:37 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2021-11-04 17:37 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1b4a63593bc6b9770789816b205039fdf3cfd3fc

commit r12-4923-g1b4a63593bc6b9770789816b205039fdf3cfd3fc
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Thu Nov 4 17:36:08 2021 +0000

    AArch64: Lower intrinsics shift to GIMPLE when possible.
    
    This lowers shifts to GIMPLE when the C interpretations of the shift operations
    matches that of AArch64.
    
    In C shifting right by BITSIZE is undefined, but the behavior is defined in
    AArch64.  Additionally negative shifts lefts are undefined for the register
    variant of the instruction (SSHL, USHL) as being right shifts.
    
    Since we have a right shift by immediate I rewrite those cases into right shifts
    
    So:
    
    int64x1_t foo3 (int64x1_t a)
    {
      return vshl_s64 (a, vdup_n_s64(-6));
    }
    
    produces:
    
    foo3:
            sshr    d0, d0, 6
            ret
    
    instead of:
    
    foo3:
            mov     x0, -6
            fmov    d1, x0
            sshl    d0, d0, d1
            ret
    
    This behavior isn't specifically mentioned for a left shift by immediate, but I
    believe that only the case because we do have a right shift by immediate but not
    a right shift by register.  As such I do the same for left shift by immediate.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-builtins.c
            (aarch64_general_gimple_fold_builtin): Add ashl, sshl, ushl, ashr,
            ashr_simd, lshr, lshr_simd.
            * config/aarch64/aarch64-simd-builtins.def (lshr): Use USHIFTIMM.
            * config/aarch64/arm_neon.h (vshr_n_u8, vshr_n_u16, vshr_n_u32,
            vshrq_n_u8, vshrq_n_u16, vshrq_n_u32, vshrq_n_u64): Fix type hack.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-1.c: New test.
            * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-2.c: New test.
            * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-3.c: New test.
            * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-4.c: New test.
            * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-5.c: New test.
            * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-6.c: New test.
            * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-7.c: New test.
            * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-8.c: New test.
            * gcc.target/aarch64/signbit-2.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.c              | 48 ++++++++++++++++++++++
 gcc/config/aarch64/aarch64-simd-builtins.def       |  2 +-
 gcc/config/aarch64/arm_neon.h                      | 14 +++----
 .../aarch64/advsimd-intrinsics/vshl-opt-1.c        | 11 +++++
 .../aarch64/advsimd-intrinsics/vshl-opt-2.c        | 11 +++++
 .../aarch64/advsimd-intrinsics/vshl-opt-3.c        | 11 +++++
 .../aarch64/advsimd-intrinsics/vshl-opt-4.c        | 11 +++++
 .../aarch64/advsimd-intrinsics/vshl-opt-5.c        | 12 ++++++
 .../aarch64/advsimd-intrinsics/vshl-opt-6.c        | 10 +++++
 .../aarch64/advsimd-intrinsics/vshl-opt-7.c        | 12 ++++++
 .../aarch64/advsimd-intrinsics/vshl-opt-8.c        | 10 +++++
 gcc/testsuite/gcc.target/aarch64/signbit-2.c       | 36 ++++++++++++++++
 12 files changed, 180 insertions(+), 8 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index ed91c2b0997..5053bf0f8fd 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -2719,6 +2719,54 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 					       1, args[0]);
 	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
 	break;
+      BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
+	if (TREE_CODE (args[1]) == INTEGER_CST
+	    && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
+	  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+					  LSHIFT_EXPR, args[0], args[1]);
+	break;
+      BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
+      BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
+	{
+	  tree cst = args[1];
+	  tree ctype = TREE_TYPE (cst);
+	  /* Left shifts can be both scalar or vector, e.g. uint64x1_t is
+	     treated as a scalar type not a vector one.  */
+	  if ((cst = uniform_integer_cst_p (cst)) != NULL_TREE)
+	    {
+	      wide_int wcst = wi::to_wide (cst);
+	      tree unit_ty = TREE_TYPE (cst);
+
+	      wide_int abs_cst = wi::abs (wcst);
+	      if (wi::geu_p (abs_cst, element_precision (args[0])))
+		break;
+
+	      if (wi::neg_p (wcst, TYPE_SIGN (ctype)))
+		{
+		  tree final_cst;
+		  final_cst = wide_int_to_tree (unit_ty, abs_cst);
+		  if (TREE_CODE (cst) != INTEGER_CST)
+		    final_cst = build_uniform_cst (ctype, final_cst);
+
+		  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+						  RSHIFT_EXPR, args[0],
+						  final_cst);
+		}
+	      else
+		new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+						LSHIFT_EXPR, args[0], args[1]);
+	    }
+	}
+	break;
+      BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
+      VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
+      BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
+      VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
+	if (TREE_CODE (args[1]) == INTEGER_CST
+	    && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
+	  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+					  RSHIFT_EXPR, args[0], args[1]);
+	break;
       BUILTIN_GPF (BINOP, fmulx, 0, ALL)
 	{
 	  gcc_assert (nargs == 2);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 6546e914919..4a7e2cf4125 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -436,7 +436,7 @@
 
   BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
   VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
-  BUILTIN_VDQ_I (SHIFTIMM, lshr, 3, NONE)
+  BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
   VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
   /* Implemented by aarch64_<sur>shr_n<mode>.  */
   BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0, NONE)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 9838c39df60..398a2e3a021 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -24128,21 +24128,21 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshr_n_u8 (uint8x8_t __a, const int __b)
 {
-  return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
+  return __builtin_aarch64_lshrv8qi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshr_n_u16 (uint16x4_t __a, const int __b)
 {
-  return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
+  return __builtin_aarch64_lshrv4hi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshr_n_u32 (uint32x2_t __a, const int __b)
 {
-  return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
+  return __builtin_aarch64_lshrv2si_uus (__a, __b);
 }
 
 __extension__ extern __inline uint64x1_t
@@ -24184,28 +24184,28 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrq_n_u8 (uint8x16_t __a, const int __b)
 {
-  return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
+  return __builtin_aarch64_lshrv16qi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrq_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_lshrv8hi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrq_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_lshrv4si_uus (__a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrq_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_lshrv2di_uus (__a, __b);
 }
 
 __extension__ extern __inline int64_t
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-1.c
new file mode 100644
index 00000000000..6baed3a19a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-1.c
@@ -0,0 +1,11 @@
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+uint8x8_t foo (uint8x8_t a)
+{
+  return vshr_n_u8 (a, 2);
+}
+
+/* { dg-final { scan-assembler-times {\tushr\t.+, 2} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-2.c
new file mode 100644
index 00000000000..df46ae31b19
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-2.c
@@ -0,0 +1,11 @@
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int16x8_t foo (int16x8_t a)
+{
+  return vshrq_n_s16 (a, 8);
+}
+
+/* { dg-final { scan-assembler-times {\tsshr\t.+, 8} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-3.c
new file mode 100644
index 00000000000..86fcc7ae7f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-3.c
@@ -0,0 +1,11 @@
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int16x8_t foo (int16x8_t a)
+{
+  return vshrq_n_s16 (a, 16);
+}
+
+/* { dg-final { scan-assembler-times {\tsshr\t.+, 16} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-4.c
new file mode 100644
index 00000000000..11f8fec6d02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-4.c
@@ -0,0 +1,11 @@
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int64x1_t foo (int64x1_t a)
+{
+  return vshl_s64 (a, vdup_n_s64(80));
+}
+
+/* { dg-final { scan-assembler-times {\tsshl\t.+, d[0-9]+} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-5.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-5.c
new file mode 100644
index 00000000000..e140e5f5fca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-5.c
@@ -0,0 +1,12 @@
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+/* { dg-skip-if "no optimizations" { *-*-* } { "-O0" } { "" } } */
+
+#include <arm_neon.h>
+
+int64x1_t foo (int64x1_t a)
+{
+  return vshl_s64 (a, vdup_n_s64(-6));
+}
+
+/* { dg-final { scan-assembler-times {\tsshr\t.+, 6} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-6.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-6.c
new file mode 100644
index 00000000000..442e3163237
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-6.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int32x4_t foo (int32x4_t x) {
+  return vshlq_s32(x, vdupq_n_s32(256));
+}
+
+/* { dg-final { scan-assembler-times {\tsshl\t.+, v[0-9].4s} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-7.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-7.c
new file mode 100644
index 00000000000..20b4920912b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-7.c
@@ -0,0 +1,12 @@
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+/* { dg-skip-if "no optimizations" { *-*-* } { "-O0" } { "" } } */
+
+#include <arm_neon.h>
+
+int32x4_t foo (int32x4_t x) {
+  return vshlq_s32(vdupq_n_s32(1), vdupq_n_s32(10));
+}
+
+/* { dg-final { scan-assembler-not {\tsshl\t} } } */
+/* { dg-final { scan-assembler-times {\tmovi\t} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-8.c
new file mode 100644
index 00000000000..f586aa79cd6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl-opt-8.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble { target aarch64*-*-* } } */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+int32x4_t foo (int32x4_t x) {
+  return vshlq_s32(x, vdupq_n_s32(-64));
+}
+
+/* { dg-final { scan-assembler-times {\tsshl\t.+, v[0-9]+.4s} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-2.c b/gcc/testsuite/gcc.target/aarch64/signbit-2.c
new file mode 100644
index 00000000000..e4e9afc8543
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbit-2.c
@@ -0,0 +1,36 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps" } */
+
+#include <arm_neon.h>
+
+int32x2_t foo1 (int32x2_t a)
+{
+  return vshr_n_s32 (vneg_s32 (a), 31);
+}
+
+int32x4_t foo2 (int32x4_t a)
+{
+  return vshrq_n_s32 (vnegq_s32 (a), 31);
+}
+
+int16x8_t foo3 (int16x8_t a)
+{
+  return vshrq_n_s16 (vnegq_s16 (a), 15);
+}
+
+int16x4_t foo4 (int16x4_t a)
+{
+  return vshr_n_s16 (vneg_s16 (a), 15);
+}
+
+int8x16_t foo5 (int8x16_t a)
+{
+  return vshrq_n_s8 (vnegq_s8 (a), 7);
+}
+
+int8x8_t foo6 (int8x8_t a)
+{
+  return vshr_n_s8 (vneg_s8 (a), 7);
+}
+
+/* { dg-final { scan-assembler-times {\tcmgt\t} 6 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-04 17:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-04 17:37 [gcc r12-4923] AArch64: Lower intrinsics shift to GIMPLE when possible Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).