[PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
@ 2018-07-20  9:37 Vlad Lazar
  2018-07-23 16:21 ` Sudakshina Das
  2018-07-31 21:48 ` James Greenhalgh
  0 siblings, 2 replies; 10+ messages in thread
From: Vlad Lazar @ 2018-07-20  9:37 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, Richard Earnshaw, James Greenhalgh

Hi,

The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
(https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)

Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.

OK for trunk?

Thanks,
Vlad

gcc/
2018-07-02  Vlad Lazar  <vlad.lazar@arm.com>

	* config/aarch64/arm_neon.h (vabsd_s64, vnegd_s64): New.

gcc/testsuite/
2018-07-02  Vlad Lazar  <vlad.lazar@arm.com>

	* gcc.target/aarch64/scalar_intrinsics.c (test_vabsd_s64, test_vabsd_s64): New.

---

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 2d18400040f031dfcdaf60269ad484647804e1be..19e22431a85bcd09d0ea759b42b0a52420b6c43c 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -11822,6 +11822,13 @@ vabsq_s64 (int64x2_t __a)
    return __builtin_aarch64_absv2di (__a);
  }
  
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vabsd_s64 (int64_t __a)
+{
+  return __builtin_aarch64_absdi (__a);
+}
+
  /* vadd */
  
  __extension__ extern __inline int64_t
@@ -22907,6 +22914,12 @@ vneg_s64 (int64x1_t __a)
    return -__a;
  }
  
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return -__a;
+}
  __extension__ extern __inline float32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vnegq_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
index ea29066e369b967d0781d31c8a5208bda9e4f685..45afeec373971838e0cd107038b4aa51a2d4998f 100644
--- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -603,6 +603,14 @@ test_vsqaddd_u64 (uint64_t a, int64_t b)
    return vsqaddd_u64 (a, b);
  }
  
+/* { dg-final { scan-assembler-times "\\tabs\\td\[0-9\]+" 1 } }  */
+
+int64_t
+test_vabsd_s64 (int64_t a)
+{
+  return vabsd_s64 (a);
+}
+
  /* { dg-final { scan-assembler-times "\\tsqabs\\tb\[0-9\]+" 1 } } */
  
  int8_t
@@ -627,6 +635,14 @@ test_vqabss_s32 (int32_t a)
    return vqabss_s32 (a);
  }
  
+/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
+
+int64_t
+test_vnegd_s64 (int64_t a)
+{
+  return vnegd_s64 (a);
+}
+
  /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
  
  int8_t

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
  2018-07-20  9:37 [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64 Vlad Lazar
@ 2018-07-23 16:21 ` Sudakshina Das
  2018-07-31 21:48 ` James Greenhalgh
  1 sibling, 0 replies; 10+ messages in thread
From: Sudakshina Das @ 2018-07-23 16:21 UTC (permalink / raw)
  To: Vlad Lazar, gcc-patches; +Cc: nd, Richard Earnshaw, James Greenhalgh

Hi Vlad


On Friday 20 July 2018 10:37 AM, Vlad Lazar wrote:
> Hi,
>
> The patch adds implementations for the NEON intrinsics vabsd_s64 and 
> vnegd_s64.
> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification) 
>
>
> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no 
> regressions.
>
> OK for trunk?

Thanks for doing this. This looks good to me but you will a maintainer's 
approval.

Thanks
Sudi
>
> Thanks,
> Vlad
>
> gcc/
> 2018-07-02Â  Vlad LazarÂ  <vlad.lazar@arm.com>
>
> Â Â Â Â * config/aarch64/arm_neon.h (vabsd_s64, vnegd_s64): New.
>
> gcc/testsuite/
> 2018-07-02Â  Vlad LazarÂ  <vlad.lazar@arm.com>
>
> Â Â Â Â * gcc.target/aarch64/scalar_intrinsics.c (test_vabsd_s64, 
> test_vabsd_s64): New.
>
> ---
>
> diff --git a/gcc/config/aarch64/arm_neon.h 
> b/gcc/config/aarch64/arm_neon.h
> index 
> 2d18400040f031dfcdaf60269ad484647804e1be..19e22431a85bcd09d0ea759b42b0a52420b6c43c 
> 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -11822,6 +11822,13 @@ vabsq_s64 (int64x2_t __a)
> Â Â  return __builtin_aarch64_absv2di (__a);
> Â }
>
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vabsd_s64 (int64_t __a)
> +{
> +Â  return __builtin_aarch64_absdi (__a);
> +}
> +
> Â /* vadd */
>
> Â __extension__ extern __inline int64_t
> @@ -22907,6 +22914,12 @@ vneg_s64 (int64x1_t __a)
> Â Â  return -__a;
> Â }
>
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vnegd_s64 (int64_t __a)
> +{
> +Â  return -__a;
> +}
> Â __extension__ extern __inline float32x4_t
> Â __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> Â vnegq_f32 (float32x4_t __a)
> diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c 
> b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> index 
> ea29066e369b967d0781d31c8a5208bda9e4f685..45afeec373971838e0cd107038b4aa51a2d4998f 
> 100644
> --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> @@ -603,6 +603,14 @@ test_vsqaddd_u64 (uint64_t a, int64_t b)
> Â Â  return vsqaddd_u64 (a, b);
> Â }
>
> +/* { dg-final { scan-assembler-times "\\tabs\\td\[0-9\]+" 1 } } */
> +
> +int64_t
> +test_vabsd_s64 (int64_t a)
> +{
> +Â  return vabsd_s64 (a);
> +}
> +
> Â /* { dg-final { scan-assembler-times "\\tsqabs\\tb\[0-9\]+" 1 } } */
>
> Â int8_t
> @@ -627,6 +635,14 @@ test_vqabss_s32 (int32_t a)
> Â Â  return vqabss_s32 (a);
> Â }
>
> +/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
> +
> +int64_t
> +test_vnegd_s64 (int64_t a)
> +{
> +Â  return vnegd_s64 (a);
> +}
> +
> Â /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
>
> Â int8_t

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
  2018-07-20  9:37 [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64 Vlad Lazar
  2018-07-23 16:21 ` Sudakshina Das
@ 2018-07-31 21:48 ` James Greenhalgh
  2018-08-01 11:53   ` Kyrill Tkachov
  2018-08-01 12:14   ` Vlad Lazar
  1 sibling, 2 replies; 10+ messages in thread
From: James Greenhalgh @ 2018-07-31 21:48 UTC (permalink / raw)
  To: Vlad Lazar; +Cc: gcc-patches, nd, Richard Earnshaw

On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
> Hi,
> 
> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
> 
> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
> 
> OK for trunk?
> 
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vnegd_s64 (int64_t __a)
> +{
> +  return -__a;
> +}

Does this give the correct behaviour for the minimum value of int64_t? That
would be undefined behaviour in C, but well-defined under ACLE.

Thanks,
James

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
  2018-07-31 21:48 ` James Greenhalgh
@ 2018-08-01 11:53   ` Kyrill Tkachov
  2018-08-01 12:14   ` Vlad Lazar
  1 sibling, 0 replies; 10+ messages in thread
From: Kyrill Tkachov @ 2018-08-01 11:53 UTC (permalink / raw)
  To: James Greenhalgh, Vlad Lazar; +Cc: gcc-patches, nd, Richard Earnshaw


On 31/07/18 22:48, James Greenhalgh wrote:
> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
> > Hi,
> >
> > The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
> > (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
> >
> > Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
> >
> > OK for trunk?
> >
> > +__extension__ extern __inline int64_t
> > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > +vnegd_s64 (int64_t __a)
> > +{
> > +  return -__a;
> > +}
>
> Does this give the correct behaviour for the minimum value of int64_t? That
> would be undefined behaviour in C, but well-defined under ACLE.
>

Similar intrinsics such as vneg_s8, vneg_s16 etc use the same implementation
(though on vector types) and the test in the testsuite for them (gcc.target/aarch64/vneg_s.c)
has cases for these limit values, so it seems to work there.
Does the fact that those are using vector types rather than the scalar int64_t matter?

Kyrill

> Thanks,
> James
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
  2018-07-31 21:48 ` James Greenhalgh
  2018-08-01 11:53   ` Kyrill Tkachov
@ 2018-08-01 12:14   ` Vlad Lazar
  2018-08-01 17:35     ` James Greenhalgh
  1 sibling, 1 reply; 10+ messages in thread
From: Vlad Lazar @ 2018-08-01 12:14 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd, Richard Earnshaw

On 31/07/18 22:48, James Greenhalgh wrote:
> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
>> Hi,
>>
>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
>>
>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
>>
>> OK for trunk?
>>
>> +__extension__ extern __inline int64_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vnegd_s64 (int64_t __a)
>> +{
>> +  return -__a;
>> +}
> 
> Does this give the correct behaviour for the minimum value of int64_t? That
> would be undefined behaviour in C, but well-defined under ACLE.
> 
> Thanks,
> James
> 

Hi. Thanks for the review.

For the minimum value of int64_t it behaves as the ACLE specifies:
"The negative of the minimum (signed) value is itself."

Thanks,
Vlad

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
  2018-08-01 12:14   ` Vlad Lazar
@ 2018-08-01 17:35     ` James Greenhalgh
  2018-08-08 16:38       ` Vlad Lazar
  0 siblings, 1 reply; 10+ messages in thread
From: James Greenhalgh @ 2018-08-01 17:35 UTC (permalink / raw)
  To: Vlad Lazar; +Cc: gcc-patches, nd, Richard Earnshaw

On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
> On 31/07/18 22:48, James Greenhalgh wrote:
> > On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
> >> Hi,
> >>
> >> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
> >> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
> >>
> >> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
> >>
> >> OK for trunk?
> >>
> >> +__extension__ extern __inline int64_t
> >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >> +vnegd_s64 (int64_t __a)
> >> +{
> >> +  return -__a;
> >> +}
> > 
> > Does this give the correct behaviour for the minimum value of int64_t? That
> > would be undefined behaviour in C, but well-defined under ACLE.
> > 
> > Thanks,
> > James
> > 
> 
> Hi. Thanks for the review.
> 
> For the minimum value of int64_t it behaves as the ACLE specifies:
> "The negative of the minimum (signed) value is itself."

What should happen in this testcase? The spoiler is below, but try to work out
what should happen and what goes wrong with your implementation.

  int foo (int64_t x)
  {
    if (x < (int64_t) 0)
      return vnegd_s64(x) < (int64_t) 0;
    else
      return 0;
  }
  
  
  int bar (void)
  {
    return foo (INT64_MIN);
  }
 
Thanks,
James


-----

<spoiler!>




INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
vnegd_s64(INT64_MIN) is identity, so the return value should be
INT64_MIN < 0; i.e. True.

This isn't what the compiler thinks... The compiler makes use of the fact
that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
as a special case. The if statement gives you a range reduction to [-INF, -1],
negating that gives you a range [1, INF], and [1, INF] is never less than 0,
so the compiler folds the function to return false. We have a mismatch in
semantics

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
  2018-08-01 17:35     ` James Greenhalgh
@ 2018-08-08 16:38       ` Vlad Lazar
  2018-08-28  8:59         ` Vlad Lazar
  0 siblings, 1 reply; 10+ messages in thread
From: Vlad Lazar @ 2018-08-08 16:38 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd, Richard Earnshaw

On 01/08/18 18:35, James Greenhalgh wrote:
> On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
>> On 31/07/18 22:48, James Greenhalgh wrote:
>>> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
>>>> Hi,
>>>>
>>>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
>>>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
>>>>
>>>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
>>>>
>>>> OK for trunk?
>>>>
>>>> +__extension__ extern __inline int64_t
>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>> +vnegd_s64 (int64_t __a)
>>>> +{
>>>> +  return -__a;
>>>> +}
>>>
>>> Does this give the correct behaviour for the minimum value of int64_t? That
>>> would be undefined behaviour in C, but well-defined under ACLE.
>>>
>>> Thanks,
>>> James
>>>
>>
>> Hi. Thanks for the review.
>>
>> For the minimum value of int64_t it behaves as the ACLE specifies:
>> "The negative of the minimum (signed) value is itself."
> 
> What should happen in this testcase? The spoiler is below, but try to work out
> what should happen and what goes wrong with your implementation.
> 
>    int foo (int64_t x)
>    {
>      if (x < (int64_t) 0)
>        return vnegd_s64(x) < (int64_t) 0;
>      else
>        return 0;
>    }
>    
>    
>    int bar (void)
>    {
>      return foo (INT64_MIN);
>    }
>   
> Thanks,
> James
> 
> 
> -----
> 
> <spoiler!>
> 
> 
> 
> 
> INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
> vnegd_s64(INT64_MIN) is identity, so the return value should be
> INT64_MIN < 0; i.e. True.
> 
> This isn't what the compiler thinks... The compiler makes use of the fact
> that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
> as a special case. The if statement gives you a range reduction to [-INF, -1],
> negating that gives you a range [1, INF], and [1, INF] is never less than 0,
> so the compiler folds the function to return false. We have a mismatch in
> semantics
> 
I see your point now. I have updated the vnegd_s64 intrinsic to convert to
unsigned before negating. This means that if the predicted range of x is
[INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
which reflect the issue you've pointed out. Note that I've change the vabsd_s64
intrinsic in order to avoid moves between integer and vector registers.

See the updated patch below. Ok for trunk?

---

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 2d18400040f031dfcdaf60269ad484647804e1be..fc734e1aa9e93c171c0670164e5a3a54209905d3 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -11822,6 +11822,18 @@ vabsq_s64 (int64x2_t __a)
    return __builtin_aarch64_absv2di (__a);
  }
  
+/* Try to avoid moving between integer and vector registers.
+   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
+   There is a testcase related to this issue:
+   gcc.target/aarch64/vabsd_s64.c.  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vabsd_s64 (int64_t __a)
+{
+  return __a < 0 ? - (uint64_t) __a : __a;
+}
+
  /* vadd */
  
  __extension__ extern __inline int64_t
@@ -22907,6 +22919,25 @@ vneg_s64 (int64x1_t __a)
    return -__a;
  }
  
+/* According to the ACLE, the negative of the minimum (signed)
+   value is itself.  This leads to a semantics mismatch, as this is
+   undefined behaviour in C.  The value range predictor is not
+   aware that the negation of a negative number can still be negative
+   and it may try to fold the expression.  See the test in
+   gcc.target/aarch64/vnegd_s64.c for an example.
+
+   The cast below tricks the value range predictor to include
+   INT64_MIN in the range it computes.  So for x in the range
+   [INT64_MIN, y] the range prediction after vnegd_s64 (x) will
+   be ~[INT64_MIN + 1, y].  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return - (uint64_t) __a;
+}
+
  __extension__ extern __inline float32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vnegq_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
index ea29066e369b967d0781d31c8a5208bda9e4f685..d943989768dd8c9aa87d9dcb899e199029ef3f8b 100644
--- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -627,6 +627,14 @@ test_vqabss_s32 (int32_t a)
    return vqabss_s32 (a);
  }
  
+/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
+
+int64_t
+test_vnegd_s64 (int64_t a)
+{
+  return vnegd_s64 (a);
+}
+
  /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
  
  int8_t
diff --git a/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..cf4e7ae4679d5b1896f35e3bf3135b0bd42befde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
@@ -0,0 +1,39 @@
+/* Test the vabsd_s64 intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+#define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"       \
+           : "=w"(V1)                                           \
+           : "w"(V1)                                            \
+           : /* No clobbers */);
+
+#define RUN_TEST(test, answ)   \
+{                                      \
+  force_simd (test);                   \
+  force_simd (answ);                   \
+  int64_t res = vabsd_s64 (test);      \
+  force_simd (res);                    \
+  if (res != answ)                     \
+    abort ();                          \
+}
+
+int64_t input[] = {INT64_MAX, 10, 0, -10, INT64_MIN + 1, INT64_MIN};
+int64_t expected[] = {INT64_MAX, 10, 0, 10, INT64_MAX, INT64_MIN};
+
+int main (void)
+{
+  RUN_TEST (input[0], expected[0]);
+  RUN_TEST (input[1], expected[1]);
+  RUN_TEST (input[2], expected[2]);
+  RUN_TEST (input[3], expected[3]);
+  RUN_TEST (input[4], expected[4]);
+  RUN_TEST (input[5], expected[5]);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
new file mode 100644
index 0000000000000000000000000000000000000000..a0f88ee12c3ea0269041213899a68f6677d80d42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
@@ -0,0 +1,34 @@
+/* Check that the compiler does not optimise the vabsd_s64 call out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he absolute value of the minimum
+   (signed) value is itself, and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -fno-inline -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+bar (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vabsd_s64 (x) < (int64_t) 0;
+  else
+	return -1;
+}
+
+int
+main (void)
+{
+  int ans = 1;
+  int res_abs = bar (INT64_MIN);
+
+  if (res_abs != ans)
+    abort ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_s.c b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
index 911054053eaefb5a67b48578fac9e2ba428c3ab2..f708e97c34570eb75595915c040e5175562c2bea 100644
--- a/gcc/testsuite/gcc.target/aarch64/vneg_s.c
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
@@ -75,6 +75,18 @@ extern void abort (void);
        }									\
    }
  
+#define RUN_TEST_SCALAR(test_val, answ_val, a, b)     \
+  {                                                   \
+    int64_t res;                                      \
+    INHIB_OPTIMIZATION;                               \
+    a = test_val;                                     \
+    b = answ_val;                                     \
+    force_simd (b);                                   \
+    force_simd (a);                                   \
+    res = vnegd_s64 (a);                              \
+    force_simd (res);                                 \
+  }
+
  int
  test_vneg_s8 ()
  {
@@ -179,6 +191,25 @@ test_vneg_s64 ()
  
  /* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
  
+int
+test_vnegd_s64 ()
+{
+  int64_t a, b;
+
+  RUN_TEST_SCALAR (TEST0, ANSW0, a, b);
+  RUN_TEST_SCALAR (TEST1, ANSW1, a, b);
+  RUN_TEST_SCALAR (TEST2, ANSW2, a, b);
+  RUN_TEST_SCALAR (TEST3, ANSW3, a, b);
+  RUN_TEST_SCALAR (TEST4, ANSW4, a, b);
+  RUN_TEST_SCALAR (TEST5, ANSW5, a, b);
+  RUN_TEST_SCALAR (LLONG_MAX, LLONG_MIN + 1, a, b);
+  RUN_TEST_SCALAR (LLONG_MIN, LLONG_MIN, a, b);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
+
  int
  test_vnegq_s8 ()
  {
@@ -283,6 +314,9 @@ main (int argc, char **argv)
    if (test_vneg_s64 ())
      abort ();
  
+  if (test_vnegd_s64 ())
+    abort ();
+
    if (test_vnegq_s8 ())
      abort ();
  
diff --git a/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c
new file mode 100644
index 0000000000000000000000000000000000000000..73d478ff49daf758e233958d134de8fb864090c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c
@@ -0,0 +1,36 @@
+/* Check that the compiler does not optimise the negation out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he negative of the minimum
+   (signed) value is itself and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+foo (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vnegd_s64 (x) < (int64_t) 0;
+  else
+    return -1;
+}
+
+/* { dg-final { scan-assembler-times {neg\tx[0-9]+, x[0-9]+} 1 } } */
+
+int
+main (void)
+{
+  int ans = 1;
+  int res = foo (INT64_MIN);
+
+  if (res != ans)
+    abort ();
+
+  return 0;
+}
+

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
  2018-08-08 16:38       ` Vlad Lazar
@ 2018-08-28  8:59         ` Vlad Lazar
  2018-08-28 21:59           ` James Greenhalgh
  0 siblings, 1 reply; 10+ messages in thread
From: Vlad Lazar @ 2018-08-28  8:59 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

Gentle ping.

On 08/08/18 17:38, Vlad Lazar wrote:
> On 01/08/18 18:35, James Greenhalgh wrote:
>> On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
>>> On 31/07/18 22:48, James Greenhalgh wrote:
>>>> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
>>>>> Hi,
>>>>>
>>>>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
>>>>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
>>>>>
>>>>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
>>>>>
>>>>> OK for trunk?
>>>>>
>>>>> +__extension__ extern __inline int64_t
>>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>>> +vnegd_s64 (int64_t __a)
>>>>> +{
>>>>> +  return -__a;
>>>>> +}
>>>>
>>>> Does this give the correct behaviour for the minimum value of int64_t? That
>>>> would be undefined behaviour in C, but well-defined under ACLE.
>>>>
>>>> Thanks,
>>>> James
>>>>
>>>
>>> Hi. Thanks for the review.
>>>
>>> For the minimum value of int64_t it behaves as the ACLE specifies:
>>> "The negative of the minimum (signed) value is itself."
>>
>> What should happen in this testcase? The spoiler is below, but try to work out
>> what should happen and what goes wrong with your implementation.
>>
>>    int foo (int64_t x)
>>    {
>>      if (x < (int64_t) 0)
>>        return vnegd_s64(x) < (int64_t) 0;
>>      else
>>        return 0;
>>    }
>>    int bar (void)
>>    {
>>      return foo (INT64_MIN);
>>    }
>> Thanks,
>> James
>>
>>
>> -----
>>
>> <spoiler!>
>>
>>
>>
>>
>> INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
>> vnegd_s64(INT64_MIN) is identity, so the return value should be
>> INT64_MIN < 0; i.e. True.
>>
>> This isn't what the compiler thinks... The compiler makes use of the fact
>> that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
>> as a special case. The if statement gives you a range reduction to [-INF, -1],
>> negating that gives you a range [1, INF], and [1, INF] is never less than 0,
>> so the compiler folds the function to return false. We have a mismatch in
>> semantics
>>
> I see your point now. I have updated the vnegd_s64 intrinsic to convert to
> unsigned before negating. This means that if the predicted range of x is
> [INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
> ~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
> which reflect the issue you've pointed out. Note that I've change the vabsd_s64
> intrinsic in order to avoid moves between integer and vector registers.
>
> See the updated patch below. Ok for trunk?
>
> ---
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 2d18400040f031dfcdaf60269ad484647804e1be..fc734e1aa9e93c171c0670164e5a3a54209905d3 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -11822,6 +11822,18 @@ vabsq_s64 (int64x2_t __a)
>     return __builtin_aarch64_absv2di (__a);
>   }
>
> +/* Try to avoid moving between integer and vector registers.
> +   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
> +   There is a testcase related to this issue:
> +   gcc.target/aarch64/vabsd_s64.c.  */
> +
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vabsd_s64 (int64_t __a)
> +{
> +  return __a < 0 ? - (uint64_t) __a : __a;
> +}
> +
>   /* vadd */
>
>   __extension__ extern __inline int64_t
> @@ -22907,6 +22919,25 @@ vneg_s64 (int64x1_t __a)
>     return -__a;
>   }
>
> +/* According to the ACLE, the negative of the minimum (signed)
> +   value is itself.  This leads to a semantics mismatch, as this is
> +   undefined behaviour in C.  The value range predictor is not
> +   aware that the negation of a negative number can still be negative
> +   and it may try to fold the expression.  See the test in
> +   gcc.target/aarch64/vnegd_s64.c for an example.
> +
> +   The cast below tricks the value range predictor to include
> +   INT64_MIN in the range it computes.  So for x in the range
> +   [INT64_MIN, y] the range prediction after vnegd_s64 (x) will
> +   be ~[INT64_MIN + 1, y].  */
> +
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vnegd_s64 (int64_t __a)
> +{
> +  return - (uint64_t) __a;
> +}
> +
>   __extension__ extern __inline float32x4_t
>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>   vnegq_f32 (float32x4_t __a)
> diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> index ea29066e369b967d0781d31c8a5208bda9e4f685..d943989768dd8c9aa87d9dcb899e199029ef3f8b 100644
> --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> @@ -627,6 +627,14 @@ test_vqabss_s32 (int32_t a)
>     return vqabss_s32 (a);
>   }
>
> +/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
> +
> +int64_t
> +test_vnegd_s64 (int64_t a)
> +{
> +  return vnegd_s64 (a);
> +}
> +
>   /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
>
>   int8_t
> diff --git a/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..cf4e7ae4679d5b1896f35e3bf3135b0bd42befde
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
> @@ -0,0 +1,39 @@
> +/* Test the vabsd_s64 intrinsic.  */
> +
> +/* { dg-do run } */
> +/* { dg-options "--save-temps -O2" } */
> +
> +#include <arm_neon.h>
> +#include <limits.h>
> +
> +extern void abort (void);
> +
> +#define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"       \
> +           : "=w"(V1)                                           \
> +           : "w"(V1)                                            \
> +           : /* No clobbers */);
> +
> +#define RUN_TEST(test, answ)   \
> +{                                      \
> +  force_simd (test);                   \
> +  force_simd (answ);                   \
> +  int64_t res = vabsd_s64 (test);      \
> +  force_simd (res);                    \
> +  if (res != answ)                     \
> +    abort ();                          \
> +}
> +
> +int64_t input[] = {INT64_MAX, 10, 0, -10, INT64_MIN + 1, INT64_MIN};
> +int64_t expected[] = {INT64_MAX, 10, 0, 10, INT64_MAX, INT64_MIN};
> +
> +int main (void)
> +{
> +  RUN_TEST (input[0], expected[0]);
> +  RUN_TEST (input[1], expected[1]);
> +  RUN_TEST (input[2], expected[2]);
> +  RUN_TEST (input[3], expected[3]);
> +  RUN_TEST (input[4], expected[4]);
> +  RUN_TEST (input[5], expected[5]);
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..a0f88ee12c3ea0269041213899a68f6677d80d42
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
> @@ -0,0 +1,34 @@
> +/* Check that the compiler does not optimise the vabsd_s64 call out.
> +   We need to check for this because there is a mismatch in semantics
> +   between the ACLE, which states that he absolute value of the minimum
> +   (signed) value is itself, and C, where this is undefined behaviour.  */
> +
> +/* { dg-do run } */
> +/* { dg-options "--save-temps -fno-inline -O2" } */
> +
> +#include <arm_neon.h>
> +#include <limits.h>
> +
> +extern void abort (void);
> +
> +int
> +bar (int64_t x)
> +{
> +  if (x < (int64_t) 0)
> +    return vabsd_s64 (x) < (int64_t) 0;
> +  else
> +    return -1;
> +}
> +
> +int
> +main (void)
> +{
> +  int ans = 1;
> +  int res_abs = bar (INT64_MIN);
> +
> +  if (res_abs != ans)
> +    abort ();
> +
> +  return 0;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_s.c b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
> index 911054053eaefb5a67b48578fac9e2ba428c3ab2..f708e97c34570eb75595915c040e5175562c2bea 100644
> --- a/gcc/testsuite/gcc.target/aarch64/vneg_s.c
> +++ b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
> @@ -75,6 +75,18 @@ extern void abort (void);
>         }                                    \
>     }
>
> +#define RUN_TEST_SCALAR(test_val, answ_val, a, b)     \
> +  {                                                   \
> +    int64_t res;                                      \
> +    INHIB_OPTIMIZATION;                               \
> +    a = test_val;                                     \
> +    b = answ_val;                                     \
> +    force_simd (b);                                   \
> +    force_simd (a);                                   \
> +    res = vnegd_s64 (a);                              \
> +    force_simd (res);                                 \
> +  }
> +
>   int
>   test_vneg_s8 ()
>   {
> @@ -179,6 +191,25 @@ test_vneg_s64 ()
>
>   /* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
>
> +int
> +test_vnegd_s64 ()
> +{
> +  int64_t a, b;
> +
> +  RUN_TEST_SCALAR (TEST0, ANSW0, a, b);
> +  RUN_TEST_SCALAR (TEST1, ANSW1, a, b);
> +  RUN_TEST_SCALAR (TEST2, ANSW2, a, b);
> +  RUN_TEST_SCALAR (TEST3, ANSW3, a, b);
> +  RUN_TEST_SCALAR (TEST4, ANSW4, a, b);
> +  RUN_TEST_SCALAR (TEST5, ANSW5, a, b);
> +  RUN_TEST_SCALAR (LLONG_MAX, LLONG_MIN + 1, a, b);
> +  RUN_TEST_SCALAR (LLONG_MIN, LLONG_MIN, a, b);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
> +
>   int
>   test_vnegq_s8 ()
>   {
> @@ -283,6 +314,9 @@ main (int argc, char **argv)
>     if (test_vneg_s64 ())
>       abort ();
>
> +  if (test_vnegd_s64 ())
> +    abort ();
> +
>     if (test_vnegq_s8 ())
>       abort ();
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..73d478ff49daf758e233958d134de8fb864090c4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c
> @@ -0,0 +1,36 @@
> +/* Check that the compiler does not optimise the negation out.
> +   We need to check for this because there is a mismatch in semantics
> +   between the ACLE, which states that he negative of the minimum
> +   (signed) value is itself and C, where this is undefined behaviour.  */
> +
> +/* { dg-do run } */
> +/* { dg-options "--save-temps -O2" } */
> +
> +#include <arm_neon.h>
> +#include <limits.h>
> +
> +extern void abort (void);
> +
> +int
> +foo (int64_t x)
> +{
> +  if (x < (int64_t) 0)
> +    return vnegd_s64 (x) < (int64_t) 0;
> +  else
> +    return -1;
> +}
> +
> +/* { dg-final { scan-assembler-times {neg\tx[0-9]+, x[0-9]+} 1 } } */
> +
> +int
> +main (void)
> +{
> +  int ans = 1;
> +  int res = foo (INT64_MIN);
> +
> +  if (res != ans)
> +    abort ();
> +
> +  return 0;
> +}
> +

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
  2018-08-28  8:59         ` Vlad Lazar
@ 2018-08-28 21:59           ` James Greenhalgh
  2018-08-31 15:07             ` Vlad Lazar
  0 siblings, 1 reply; 10+ messages in thread
From: James Greenhalgh @ 2018-08-28 21:59 UTC (permalink / raw)
  To: Vlad Lazar; +Cc: gcc-patches, nd

On Tue, Aug 28, 2018 at 03:59:25AM -0500, Vlad Lazar wrote:
> Gentle ping.
> 
> On 08/08/18 17:38, Vlad Lazar wrote:
> > On 01/08/18 18:35, James Greenhalgh wrote:
> >> On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
> >>> On 31/07/18 22:48, James Greenhalgh wrote:
> >>>> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
> >>>>> Hi,
> >>>>>
> >>>>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
> >>>>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
> >>>>>
> >>>>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
> >>>>>
> >>>>> OK for trunk?
> >>>>>
> >>>>> +__extension__ extern __inline int64_t
> >>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >>>>> +vnegd_s64 (int64_t __a)
> >>>>> +{
> >>>>> +  return -__a;
> >>>>> +}
> >>>>
> >>>> Does this give the correct behaviour for the minimum value of int64_t? That
> >>>> would be undefined behaviour in C, but well-defined under ACLE.
> >>>>
> >>>> Thanks,
> >>>> James
> >>>>
> >>>
> >>> Hi. Thanks for the review.
> >>>
> >>> For the minimum value of int64_t it behaves as the ACLE specifies:
> >>> "The negative of the minimum (signed) value is itself."
> >>
> >> What should happen in this testcase? The spoiler is below, but try to work out
> >> what should happen and what goes wrong with your implementation.
> >>
> >>    int foo (int64_t x)
> >>    {
> >>      if (x < (int64_t) 0)
> >>        return vnegd_s64(x) < (int64_t) 0;
> >>      else
> >>        return 0;
> >>    }
> >>    int bar (void)
> >>    {
> >>      return foo (INT64_MIN);
> >>    }
> >> Thanks,
> >> James
> >>
> >>
> >> -----
> >>
> >> <spoiler!>
> >>
> >>
> >>
> >>
> >> INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
> >> vnegd_s64(INT64_MIN) is identity, so the return value should be
> >> INT64_MIN < 0; i.e. True.
> >>
> >> This isn't what the compiler thinks... The compiler makes use of the fact
> >> that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
> >> as a special case. The if statement gives you a range reduction to [-INF, -1],
> >> negating that gives you a range [1, INF], and [1, INF] is never less than 0,
> >> so the compiler folds the function to return false. We have a mismatch in
> >> semantics
> >>
> > I see your point now. I have updated the vnegd_s64 intrinsic to convert to
> > unsigned before negating. This means that if the predicted range of x is
> > [INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
> > ~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
> > which reflect the issue you've pointed out. Note that I've change the vabsd_s64
> > intrinsic in order to avoid moves between integer and vector registers.

I think from my reading of the standard that this is OK, but I may be rusty
and missing a corner case.

OK for trunk.

Thanks,
James

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64
  2018-08-28 21:59           ` James Greenhalgh
@ 2018-08-31 15:07             ` Vlad Lazar
  0 siblings, 0 replies; 10+ messages in thread
From: Vlad Lazar @ 2018-08-31 15:07 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

[-- Attachment #1: Type: text/plain, Size: 3318 bytes --]

On 28/08/18 22:58, James Greenhalgh wrote:
> On Tue, Aug 28, 2018 at 03:59:25AM -0500, Vlad Lazar wrote:
>> Gentle ping.
>>
>> On 08/08/18 17:38, Vlad Lazar wrote:
>>> On 01/08/18 18:35, James Greenhalgh wrote:
>>>> On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
>>>>> On 31/07/18 22:48, James Greenhalgh wrote:
>>>>>> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
>>>>>>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
>>>>>>>
>>>>>>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
>>>>>>>
>>>>>>> OK for trunk?
>>>>>>>
>>>>>>> +__extension__ extern __inline int64_t
>>>>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>>>>> +vnegd_s64 (int64_t __a)
>>>>>>> +{
>>>>>>> +  return -__a;
>>>>>>> +}
>>>>>>
>>>>>> Does this give the correct behaviour for the minimum value of int64_t? That
>>>>>> would be undefined behaviour in C, but well-defined under ACLE.
>>>>>>
>>>>>> Thanks,
>>>>>> James
>>>>>>
>>>>>
>>>>> Hi. Thanks for the review.
>>>>>
>>>>> For the minimum value of int64_t it behaves as the ACLE specifies:
>>>>> "The negative of the minimum (signed) value is itself."
>>>>
>>>> What should happen in this testcase? The spoiler is below, but try to work out
>>>> what should happen and what goes wrong with your implementation.
>>>>
>>>>     int foo (int64_t x)
>>>>     {
>>>>       if (x < (int64_t) 0)
>>>>         return vnegd_s64(x) < (int64_t) 0;
>>>>       else
>>>>         return 0;
>>>>     }
>>>>     int bar (void)
>>>>     {
>>>>       return foo (INT64_MIN);
>>>>     }
>>>> Thanks,
>>>> James
>>>>
>>>>
>>>> -----
>>>>
>>>> <spoiler!>
>>>>
>>>>
>>>>
>>>>
>>>> INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
>>>> vnegd_s64(INT64_MIN) is identity, so the return value should be
>>>> INT64_MIN < 0; i.e. True.
>>>>
>>>> This isn't what the compiler thinks... The compiler makes use of the fact
>>>> that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
>>>> as a special case. The if statement gives you a range reduction to [-INF, -1],
>>>> negating that gives you a range [1, INF], and [1, INF] is never less than 0,
>>>> so the compiler folds the function to return false. We have a mismatch in
>>>> semantics
>>>>
>>> I see your point now. I have updated the vnegd_s64 intrinsic to convert to
>>> unsigned before negating. This means that if the predicted range of x is
>>> [INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
>>> ~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
>>> which reflect the issue you've pointed out. Note that I've change the vabsd_s64
>>> intrinsic in order to avoid moves between integer and vector registers.
>
> I think from my reading of the standard that this is OK, but I may be rusty
> and missing a corner case.
>
> OK for trunk.
>
> Thanks,
> James
>
Committed with an obvious change to testsuite/gcc.target/aarch64/vneg_s.c testcase:
merged two scan assembler directives which were searching for the same pattern.
See the patch below.

Thanks,
Vlad

[-- Attachment #2: vabsd_vnegd.diff --]
[-- Type: text/x-patch, Size: 8218 bytes --]

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 264018)
+++ ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2018-08-31  Vlad Lazar  <vlad.lazar@arm.com>
+
+	* config/aarch64/arm_neon.h (vabsd_s64): New.
+	(vnegd_s64): Likewise.
+
 2018-08-31  Martin Jambor  <mjambor@suse.cz>
 
 	* ipa-cp.c (estimate_local_effects): Replace wrong MAX with MIN.
Index: config/aarch64/arm_neon.h
===================================================================
--- config/aarch64/arm_neon.h	(revision 264018)
+++ config/aarch64/arm_neon.h	(working copy)
@@ -11822,6 +11822,18 @@
   return __builtin_aarch64_absv2di (__a);
 }
 
+/* Try to avoid moving between integer and vector registers.
+   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
+   There is a testcase related to this issue:
+   gcc.target/aarch64/vabsd_s64.c.  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vabsd_s64 (int64_t __a)
+{
+  return __a < 0 ? - (uint64_t) __a : __a;
+}
+
 /* vadd */
 
 __extension__ extern __inline int64_t
@@ -22907,6 +22919,25 @@
   return -__a;
 }
 
+/* According to the ACLE, the negative of the minimum (signed)
+   value is itself.  This leads to a semantics mismatch, as this is
+   undefined behaviour in C.  The value range predictor is not
+   aware that the negation of a negative number can still be negative
+   and it may try to fold the expression.  See the test in
+   gcc.target/aarch64/vnegd_s64.c for an example.
+
+   The cast below tricks the value range predictor to include
+   INT64_MIN in the range it computes.  So for x in the range
+   [INT64_MIN, y] the range prediction after vnegd_s64 (x) will
+   be ~[INT64_MIN + 1, y].  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return - (uint64_t) __a;
+}
+
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vnegq_f32 (float32x4_t __a)
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 264018)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,12 @@
+2018-08-31  Vlad Lazar  <vlad.lazar@arm.com>
+
+	* gcc.target/aarch64/scalar_intrinsics.c (test_vnegd_s64): New.
+	* gcc.target/aarch64/vneg_s.c (RUN_TEST_SCALAR): New.
+	(test_vnegd_s64): Likewise.
+	* gcc.target/aarch64/vnegd_64.c: New.
+	* gcc.target/aarch64/vabsd_64.c: New.
+	* gcc.tartget/aarch64/vabs_intrinsic_3.c: New.
+
 2018-08-31  Nathan Sidwell  <nathan@acm.org>
 
 	PR c++/87155
Index: testsuite/gcc.target/aarch64/scalar_intrinsics.c
===================================================================
--- testsuite/gcc.target/aarch64/scalar_intrinsics.c	(revision 264018)
+++ testsuite/gcc.target/aarch64/scalar_intrinsics.c	(working copy)
@@ -627,6 +627,14 @@
   return vqabss_s32 (a);
 }
 
+/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
+
+int64_t
+test_vnegd_s64 (int64_t a)
+{
+  return vnegd_s64 (a);
+}
+
 /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
 
 int8_t
Index: testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
===================================================================
--- testsuite/gcc.target/aarch64/vabs_intrinsic_3.c	(revision 0)
+++ testsuite/gcc.target/aarch64/vabs_intrinsic_3.c	(working copy)
@@ -0,0 +1,39 @@
+/* Test the vabsd_s64 intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+#define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"       \
+           : "=w"(V1)                                           \
+           : "w"(V1)                                            \
+           : /* No clobbers */);
+
+#define RUN_TEST(test, answ)   \
+{                                      \
+  force_simd (test);                   \
+  force_simd (answ);                   \
+  int64_t res = vabsd_s64 (test);      \
+  force_simd (res);                    \
+  if (res != answ)                     \
+    abort ();                          \
+}
+
+int64_t input[] = {INT64_MAX, 10, 0, -10, INT64_MIN + 1, INT64_MIN};
+int64_t expected[] = {INT64_MAX, 10, 0, 10, INT64_MAX, INT64_MIN};
+
+int main (void)
+{
+  RUN_TEST (input[0], expected[0]);
+  RUN_TEST (input[1], expected[1]);
+  RUN_TEST (input[2], expected[2]);
+  RUN_TEST (input[3], expected[3]);
+  RUN_TEST (input[4], expected[4]);
+  RUN_TEST (input[5], expected[5]);
+
+  return 0;
+}
Index: testsuite/gcc.target/aarch64/vabsd_s64.c
===================================================================
--- testsuite/gcc.target/aarch64/vabsd_s64.c	(revision 0)
+++ testsuite/gcc.target/aarch64/vabsd_s64.c	(working copy)
@@ -0,0 +1,34 @@
+/* Check that the compiler does not optimise the vabsd_s64 call out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he absolute value of the minimum
+   (signed) value is itself, and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -fno-inline -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+bar (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vabsd_s64 (x) < (int64_t) 0;
+  else
+	return -1;
+}
+
+int
+main (void)
+{
+  int ans = 1;
+  int res_abs = bar (INT64_MIN);
+
+  if (res_abs != ans)
+    abort ();
+
+  return 0;
+}
+
Index: testsuite/gcc.target/aarch64/vneg_s.c
===================================================================
--- testsuite/gcc.target/aarch64/vneg_s.c	(revision 264018)
+++ testsuite/gcc.target/aarch64/vneg_s.c	(working copy)
@@ -75,6 +75,18 @@
       }									\
   }
 
+#define RUN_TEST_SCALAR(test_val, answ_val, a, b)     \
+  {                                                   \
+    int64_t res;                                      \
+    INHIB_OPTIMIZATION;                               \
+    a = test_val;                                     \
+    b = answ_val;                                     \
+    force_simd (b);                                   \
+    force_simd (a);                                   \
+    res = vnegd_s64 (a);                              \
+    force_simd (res);                                 \
+  }
+
 int
 test_vneg_s8 ()
 {
@@ -177,8 +189,25 @@
   return 0;
 }
 
-/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
+int
+test_vnegd_s64 ()
+{
+  int64_t a, b;
 
+  RUN_TEST_SCALAR (TEST0, ANSW0, a, b);
+  RUN_TEST_SCALAR (TEST1, ANSW1, a, b);
+  RUN_TEST_SCALAR (TEST2, ANSW2, a, b);
+  RUN_TEST_SCALAR (TEST3, ANSW3, a, b);
+  RUN_TEST_SCALAR (TEST4, ANSW4, a, b);
+  RUN_TEST_SCALAR (TEST5, ANSW5, a, b);
+  RUN_TEST_SCALAR (LLONG_MAX, LLONG_MIN + 1, a, b);
+  RUN_TEST_SCALAR (LLONG_MIN, LLONG_MIN, a, b);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 16 } } */
+
 int
 test_vnegq_s8 ()
 {
@@ -283,6 +312,9 @@
   if (test_vneg_s64 ())
     abort ();
 
+  if (test_vnegd_s64 ())
+    abort ();
+
   if (test_vnegq_s8 ())
     abort ();
 
Index: testsuite/gcc.target/aarch64/vnegd_s64.c
===================================================================
--- testsuite/gcc.target/aarch64/vnegd_s64.c	(revision 0)
+++ testsuite/gcc.target/aarch64/vnegd_s64.c	(working copy)
@@ -0,0 +1,36 @@
+/* Check that the compiler does not optimise the negation out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he negative of the minimum
+   (signed) value is itself and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+foo (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vnegd_s64 (x) < (int64_t) 0;
+  else
+    return -1;
+}
+
+/* { dg-final { scan-assembler-times {neg\tx[0-9]+, x[0-9]+} 1 } } */
+
+int
+main (void)
+{
+  int ans = 1;
+  int res = foo (INT64_MIN);
+
+  if (res != ans)
+    abort ();
+
+  return 0;
+}
+

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-08-31 15:07 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-20  9:37 [PATCH][AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64 Vlad Lazar
2018-07-23 16:21 ` Sudakshina Das
2018-07-31 21:48 ` James Greenhalgh
2018-08-01 11:53   ` Kyrill Tkachov
2018-08-01 12:14   ` Vlad Lazar
2018-08-01 17:35     ` James Greenhalgh
2018-08-08 16:38       ` Vlad Lazar
2018-08-28  8:59         ` Vlad Lazar
2018-08-28 21:59           ` James Greenhalgh
2018-08-31 15:07             ` Vlad Lazar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).