RE: [PATCH][AArch64] Use conditional negate for abs expansion

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* RE: [PATCH][AArch64] Use conditional negate for abs expansion
@ 2015-04-27 13:42 Wilco Dijkstra
  2015-04-27 16:26 ` James Greenhalgh
  0 siblings, 1 reply; 11+ messages in thread
From: Wilco Dijkstra @ 2015-04-27 13:42 UTC (permalink / raw)
  To: 'GCC Patches'

ping

> -----Original Message-----
> From: Wilco Dijkstra [mailto:wdijkstr@arm.com]
> Sent: 03 March 2015 16:19
> To: GCC Patches
> Subject: [PATCH][AArch64] Use conditional negate for abs expansion
> 
> Expand abs into a compare and conditional negate. This is the most obvious expansion, enables
> merging of the comparison into ALU instructions and is faster on all implementations.
> Bootstrapped & regression tested.
> 
> int f(int x) { return abs (x + 1); }
> 
> Before:
>         add     w0, w0, 1
>         sxtw    x0, w0
>         eor     x1, x0, x0, asr 63
>         sub     x1, x1, x0, asr 63
>         mov     x0, x1
>         ret
> 
> After:
>         adds    w0, w0, 1
>         csneg   w0, w0, w0, pl
>         ret
> 
> ChangeLog:
> 
> 2015-03-03  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* gcc/config/aarch64/aarch64.md (absdi2): optimize abs expansion.
> 	(csneg3<mode>_insn): enable expansion of pattern.
> 	* gcc/testsuite/gcc.target/aarch64/abs_1.c (abs64): update test
> 	for new abs expansion.  (abs64_in_dreg): likewise.
> 
> ---
>  gcc/config/aarch64/aarch64.md            | 33 +++++++-------------------------
>  gcc/testsuite/gcc.target/aarch64/abs_1.c |  5 ++---
>  2 files changed, 9 insertions(+), 29 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 1f4169e..46b7a63 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -2172,35 +2172,16 @@
>    [(set_attr "type" "alu_ext")]
>  )
> 
> -(define_insn_and_split "absdi2"
> -  [(set (match_operand:DI 0 "register_operand" "=&r,w")
> -	(abs:DI (match_operand:DI 1 "register_operand" "r,w")))]
> +(define_expand "abs<mode>2"
> +  [(match_operand:GPI 0 "register_operand" "")
> +   (match_operand:GPI 1 "register_operand" "")]
>    ""
> -  "@
> -   #
> -   abs\\t%d0, %d1"
> -  "reload_completed
> -   && GP_REGNUM_P (REGNO (operands[0]))
> -   && GP_REGNUM_P (REGNO (operands[1]))"
> -  [(const_int 0)]
>    {
> -    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
> -			    gen_rtx_XOR (DImode,
> -					 gen_rtx_ASHIFTRT (DImode,
> -							   operands[1],
> -							   GEN_INT (63)),
> -					 operands[1])));
> -    emit_insn (gen_rtx_SET (VOIDmode,
> -			    operands[0],
> -			    gen_rtx_MINUS (DImode,
> -					   operands[0],
> -					   gen_rtx_ASHIFTRT (DImode,
> -							     operands[1],
> -							     GEN_INT (63)))));
> +    rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
> +    rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx);
> +    emit_insn (gen_csneg3<mode>_insn (operands[0], x, operands[1], operands[1]));
>      DONE;
>    }
> -  [(set_attr "type" "alu_sreg")
> -   (set_attr "simd" "no,yes")]
>  )
> 
>  (define_insn "neg<mode>2"
> @@ -2879,7 +2860,7 @@
>    [(set_attr "type" "csel")]
>  )
> 
> -(define_insn "*csneg3<mode>_insn"
> +(define_insn "csneg3<mode>_insn"
>    [(set (match_operand:GPI 0 "register_operand" "=r")
>          (if_then_else:GPI
>  	  (match_operand 1 "aarch64_comparison_operation" "")
> diff --git a/gcc/testsuite/gcc.target/aarch64/abs_1.c
> b/gcc/testsuite/gcc.target/aarch64/abs_1.c
> index 938bc84..11f1095 100644
> --- a/gcc/testsuite/gcc.target/aarch64/abs_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/abs_1.c
> @@ -7,15 +7,14 @@ extern void abort (void);
>  long long
>  abs64 (long long a)
>  {
> -  /* { dg-final { scan-assembler "eor\t" } } */
> -  /* { dg-final { scan-assembler "sub\t" } } */
> +  /* { dg-final { scan-assembler "csneg\t" } } */
>    return llabs (a);
>  }
> 
>  long long
>  abs64_in_dreg (long long a)
>  {
> -  /* { dg-final { scan-assembler "abs\td\[0-9\]+, d\[0-9\]+" } } */
> +  /* { dg-final { scan-assembler "csneg\t" } } */
>    register long long x asm ("d8") = a;
>    register long long y asm ("d9");
>    asm volatile ("" : : "w" (x));
> --
> 1.9.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][AArch64] Use conditional negate for abs expansion
  2015-04-27 13:42 [PATCH][AArch64] Use conditional negate for abs expansion Wilco Dijkstra
@ 2015-04-27 16:26 ` James Greenhalgh
  2015-04-27 16:57   ` Wilco Dijkstra
  0 siblings, 1 reply; 11+ messages in thread
From: James Greenhalgh @ 2015-04-27 16:26 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: 'GCC Patches'

On Mon, Apr 27, 2015 at 02:42:36PM +0100, Wilco Dijkstra wrote:
> > -----Original Message-----
> > From: Wilco Dijkstra [mailto:wdijkstr@arm.com]
> > Sent: 03 March 2015 16:19
> > To: GCC Patches
> > Subject: [PATCH][AArch64] Use conditional negate for abs expansion
> > 
> > Expand abs into a compare and conditional negate. This is the most obvious expansion, enables
> > merging of the comparison into ALU instructions and is faster on all implementations.
> > Bootstrapped & regression tested.
> > 
> > int f(int x) { return abs (x + 1); }
> > 
> > Before:
> >         add     w0, w0, 1
> >         sxtw    x0, w0
> >         eor     x1, x0, x0, asr 63
> >         sub     x1, x1, x0, asr 63
> >         mov     x0, x1
> >         ret
> > 
> > After:
> >         adds    w0, w0, 1
> >         csneg   w0, w0, w0, pl
> >         ret
> > 
> > ChangeLog:
> > 
> > 2015-03-03  Wilco Dijkstra  <wdijkstr@arm.com>
> > 
> > 	* gcc/config/aarch64/aarch64.md (absdi2): optimize abs expansion.
> > 	(csneg3<mode>_insn): enable expansion of pattern.
> > 	* gcc/testsuite/gcc.target/aarch64/abs_1.c (abs64): update test
> > 	for new abs expansion.  (abs64_in_dreg): likewise.


This looks like it breaks support for abs in a D register (for example
at the end of a loop, or extracted from Neon Intrinsics, etc).

e.g. (totally contrived...)

  int64x1_t
  abs_max (int64x2_t x, int64_t *wb)
  {
    int64_t y = vgetq_lane_s64 (x, 0);
    if (y < 0)
      y = -y;
    return vdup_n_s64 (y);
  }

Which currently generates:

  abs_max:
          abs     d0, d0
          ret

I suppose we don't need to worry too much about that (and the current
implementation doesn't seem to catch it reliably anyway), but it would be
good if we could keep the support - even if it is rarely used.

Thanks,
James

> > 
> > ---
> >  gcc/config/aarch64/aarch64.md            | 33 +++++++-------------------------
> >  gcc/testsuite/gcc.target/aarch64/abs_1.c |  5 ++---
> >  2 files changed, 9 insertions(+), 29 deletions(-)
> > 
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 1f4169e..46b7a63 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -2172,35 +2172,16 @@
> >    [(set_attr "type" "alu_ext")]
> >  )
> > 
> > -(define_insn_and_split "absdi2"
> > -  [(set (match_operand:DI 0 "register_operand" "=&r,w")
> > -	(abs:DI (match_operand:DI 1 "register_operand" "r,w")))]
> > +(define_expand "abs<mode>2"
> > +  [(match_operand:GPI 0 "register_operand" "")
> > +   (match_operand:GPI 1 "register_operand" "")]
> >    ""
> > -  "@
> > -   #
> > -   abs\\t%d0, %d1"
> > -  "reload_completed
> > -   && GP_REGNUM_P (REGNO (operands[0]))
> > -   && GP_REGNUM_P (REGNO (operands[1]))"
> > -  [(const_int 0)]
> >    {
> > -    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
> > -			    gen_rtx_XOR (DImode,
> > -					 gen_rtx_ASHIFTRT (DImode,
> > -							   operands[1],
> > -							   GEN_INT (63)),
> > -					 operands[1])));
> > -    emit_insn (gen_rtx_SET (VOIDmode,
> > -			    operands[0],
> > -			    gen_rtx_MINUS (DImode,
> > -					   operands[0],
> > -					   gen_rtx_ASHIFTRT (DImode,
> > -							     operands[1],
> > -							     GEN_INT (63)))));
> > +    rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
> > +    rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx);
> > +    emit_insn (gen_csneg3<mode>_insn (operands[0], x, operands[1], operands[1]));
> >      DONE;
> >    }
> > -  [(set_attr "type" "alu_sreg")
> > -   (set_attr "simd" "no,yes")]
> >  )
> > 
> >  (define_insn "neg<mode>2"
> > @@ -2879,7 +2860,7 @@
> >    [(set_attr "type" "csel")]
> >  )
> > 
> > -(define_insn "*csneg3<mode>_insn"
> > +(define_insn "csneg3<mode>_insn"
> >    [(set (match_operand:GPI 0 "register_operand" "=r")
> >          (if_then_else:GPI
> >  	  (match_operand 1 "aarch64_comparison_operation" "")
> > diff --git a/gcc/testsuite/gcc.target/aarch64/abs_1.c
> > b/gcc/testsuite/gcc.target/aarch64/abs_1.c
> > index 938bc84..11f1095 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/abs_1.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/abs_1.c
> > @@ -7,15 +7,14 @@ extern void abort (void);
> >  long long
> >  abs64 (long long a)
> >  {
> > -  /* { dg-final { scan-assembler "eor\t" } } */
> > -  /* { dg-final { scan-assembler "sub\t" } } */
> > +  /* { dg-final { scan-assembler "csneg\t" } } */
> >    return llabs (a);
> >  }
> > 
> >  long long
> >  abs64_in_dreg (long long a)
> >  {
> > -  /* { dg-final { scan-assembler "abs\td\[0-9\]+, d\[0-9\]+" } } */
> > +  /* { dg-final { scan-assembler "csneg\t" } } */
> >    register long long x asm ("d8") = a;
> >    register long long y asm ("d9");
> >    asm volatile ("" : : "w" (x));
> > --
> > 1.9.1
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH][AArch64] Use conditional negate for abs expansion
  2015-04-27 16:26 ` James Greenhalgh
@ 2015-04-27 16:57   ` Wilco Dijkstra
  2015-05-14  9:43     ` James Greenhalgh
  0 siblings, 1 reply; 11+ messages in thread
From: Wilco Dijkstra @ 2015-04-27 16:57 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: 'GCC Patches'

> James Greenhalgh wrote:
> On Mon, Apr 27, 2015 at 02:42:36PM +0100, Wilco Dijkstra wrote:
> > > -----Original Message-----
> > > From: Wilco Dijkstra [mailto:wdijkstr@arm.com]
> > > Sent: 03 March 2015 16:19
> > > To: GCC Patches
> > > Subject: [PATCH][AArch64] Use conditional negate for abs expansion
> > >
> > > Expand abs into a compare and conditional negate. This is the most obvious expansion,
> enables
> > > merging of the comparison into ALU instructions and is faster on all implementations.
> > > Bootstrapped & regression tested.
> > >
> > > int f(int x) { return abs (x + 1); }
> > >
> > > Before:
> > >         add     w0, w0, 1
> > >         sxtw    x0, w0
> > >         eor     x1, x0, x0, asr 63
> > >         sub     x1, x1, x0, asr 63
> > >         mov     x0, x1
> > >         ret
> > >
> > > After:
> > >         adds    w0, w0, 1
> > >         csneg   w0, w0, w0, pl
> > >         ret
> > >
> > > ChangeLog:
> > >
> > > 2015-03-03  Wilco Dijkstra  <wdijkstr@arm.com>
> > >
> > > 	* gcc/config/aarch64/aarch64.md (absdi2): optimize abs expansion.
> > > 	(csneg3<mode>_insn): enable expansion of pattern.
> > > 	* gcc/testsuite/gcc.target/aarch64/abs_1.c (abs64): update test
> > > 	for new abs expansion.  (abs64_in_dreg): likewise.
> 
> 
> This looks like it breaks support for abs in a D register (for example
> at the end of a loop, or extracted from Neon Intrinsics, etc).
> 
> e.g. (totally contrived...)
> 
>   int64x1_t
>   abs_max (int64x2_t x, int64_t *wb)
>   {
>     int64_t y = vgetq_lane_s64 (x, 0);
>     if (y < 0)
>       y = -y;
>     return vdup_n_s64 (y);
>   }
> 
> Which currently generates:
> 
>   abs_max:
>           abs     d0, d0
>           ret
> 
> I suppose we don't need to worry too much about that (and the current
> implementation doesn't seem to catch it reliably anyway), but it would be
> good if we could keep the support - even if it is rarely used.

Well it may be possible to write a peephole for this rare case, but I hope
people would use a vabs if that's what they wanted...

Wilco


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][AArch64] Use conditional negate for abs expansion
  2015-04-27 16:57   ` Wilco Dijkstra
@ 2015-05-14  9:43     ` James Greenhalgh
  2015-05-14 13:25       ` Wilco Dijkstra
  0 siblings, 1 reply; 11+ messages in thread
From: James Greenhalgh @ 2015-05-14  9:43 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: 'GCC Patches'

On Mon, Apr 27, 2015 at 05:57:26PM +0100, Wilco Dijkstra wrote:
> > James Greenhalgh wrote:
> > On Mon, Apr 27, 2015 at 02:42:36PM +0100, Wilco Dijkstra wrote:
> > > > -----Original Message-----
> > > > From: Wilco Dijkstra [mailto:wdijkstr@arm.com]
> > > > Sent: 03 March 2015 16:19
> > > > To: GCC Patches
> > > > Subject: [PATCH][AArch64] Use conditional negate for abs expansion
> > > >
> > > > Expand abs into a compare and conditional negate. This is the most obvious expansion,
> > enables
> > > > merging of the comparison into ALU instructions and is faster on all implementations.
> > > > Bootstrapped & regression tested.
> > > >
> > > > int f(int x) { return abs (x + 1); }
> > > >
> > > > Before:
> > > >         add     w0, w0, 1
> > > >         sxtw    x0, w0
> > > >         eor     x1, x0, x0, asr 63
> > > >         sub     x1, x1, x0, asr 63
> > > >         mov     x0, x1
> > > >         ret
> > > >
> > > > After:
> > > >         adds    w0, w0, 1
> > > >         csneg   w0, w0, w0, pl
> > > >         ret
> > > >
> > > > ChangeLog:
> > > >
> > > > 2015-03-03  Wilco Dijkstra  <wdijkstr@arm.com>
> > > >
> > > > 	* gcc/config/aarch64/aarch64.md (absdi2): optimize abs expansion.
> > > > 	(csneg3<mode>_insn): enable expansion of pattern.
> > > > 	* gcc/testsuite/gcc.target/aarch64/abs_1.c (abs64): update test
> > > > 	for new abs expansion.  (abs64_in_dreg): likewise.
> > 
> > 
> > This looks like it breaks support for abs in a D register (for example
> > at the end of a loop, or extracted from Neon Intrinsics, etc).
> > 
> > e.g. (totally contrived...)
> > 
> >   int64x1_t
> >   abs_max (int64x2_t x, int64_t *wb)
> >   {
> >     int64_t y = vgetq_lane_s64 (x, 0);
> >     if (y < 0)
> >       y = -y;
> >     return vdup_n_s64 (y);
> >   }
> > 
> > Which currently generates:
> > 
> >   abs_max:
> >           abs     d0, d0
> >           ret
> > 
> > I suppose we don't need to worry too much about that (and the current
> > implementation doesn't seem to catch it reliably anyway), but it would be
> > good if we could keep the support - even if it is rarely used.
> 
> Well it may be possible to write a peephole for this rare case, but I hope
> people would use a vabs if that's what they wanted...

OK, I've seen some of the pain of our current abs expansion code, so I
suppose that we want to get the common case right first, rather
than be too concerned about contrived corner cases.

This patch is OK.

Thanks,
James

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH][AArch64] Use conditional negate for abs expansion
  2015-05-14  9:43     ` James Greenhalgh
@ 2015-05-14 13:25       ` Wilco Dijkstra
  0 siblings, 0 replies; 11+ messages in thread
From: Wilco Dijkstra @ 2015-05-14 13:25 UTC (permalink / raw)
  To: Jiong Wang; +Cc: 'GCC Patches'

[-- Attachment #1: Type: text/plain, Size: 2726 bytes --]

> James Greenhalgh wrote:
> On Mon, Apr 27, 2015 at 05:57:26PM +0100, Wilco Dijkstra wrote:
> > > James Greenhalgh wrote:
> > > On Mon, Apr 27, 2015 at 02:42:36PM +0100, Wilco Dijkstra wrote:
> > > > > -----Original Message-----
> > > > > From: Wilco Dijkstra [mailto:wdijkstr@arm.com]
> > > > > Sent: 03 March 2015 16:19
> > > > > To: GCC Patches
> > > > > Subject: [PATCH][AArch64] Use conditional negate for abs expansion
> > > > >
> > > > > Expand abs into a compare and conditional negate. This is the most obvious expansion,
> > > enables
> > > > > merging of the comparison into ALU instructions and is faster on all implementations.
> > > > > Bootstrapped & regression tested.
> > > > >
> > > > > int f(int x) { return abs (x + 1); }
> > > > >
> > > > > Before:
> > > > >         add     w0, w0, 1
> > > > >         sxtw    x0, w0
> > > > >         eor     x1, x0, x0, asr 63
> > > > >         sub     x1, x1, x0, asr 63
> > > > >         mov     x0, x1
> > > > >         ret
> > > > >
> > > > > After:
> > > > >         adds    w0, w0, 1
> > > > >         csneg   w0, w0, w0, pl
> > > > >         ret
> > > > >
> > > > > ChangeLog:
> > > > >
> > > > > 2015-03-03  Wilco Dijkstra  <wdijkstr@arm.com>
> > > > >
> > > > > 	* gcc/config/aarch64/aarch64.md (absdi2): optimize abs expansion.
> > > > > 	(csneg3<mode>_insn): enable expansion of pattern.
> > > > > 	* gcc/testsuite/gcc.target/aarch64/abs_1.c (abs64): update test
> > > > > 	for new abs expansion.  (abs64_in_dreg): likewise.
> > >
> > >
> > > This looks like it breaks support for abs in a D register (for example
> > > at the end of a loop, or extracted from Neon Intrinsics, etc).
> > >
> > > e.g. (totally contrived...)
> > >
> > >   int64x1_t
> > >   abs_max (int64x2_t x, int64_t *wb)
> > >   {
> > >     int64_t y = vgetq_lane_s64 (x, 0);
> > >     if (y < 0)
> > >       y = -y;
> > >     return vdup_n_s64 (y);
> > >   }
> > >
> > > Which currently generates:
> > >
> > >   abs_max:
> > >           abs     d0, d0
> > >           ret
> > >
> > > I suppose we don't need to worry too much about that (and the current
> > > implementation doesn't seem to catch it reliably anyway), but it would be
> > > good if we could keep the support - even if it is rarely used.
> >
> > Well it may be possible to write a peephole for this rare case, but I hope
> > people would use a vabs if that's what they wanted...
> 
> OK, I've seen some of the pain of our current abs expansion code, so I
> suppose that we want to get the common case right first, rather
> than be too concerned about contrived corner cases.
> 
> This patch is OK.

Jiong, could you check this in please?

Wilco


[-- Attachment #2: 0001-Improve-abs-expansion.txt --]
[-- Type: text/plain, Size: 2692 bytes --]

---
 gcc/config/aarch64/aarch64.md            | 33 +++++++-------------------------
 gcc/testsuite/gcc.target/aarch64/abs_1.c |  5 ++---
 2 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1f4169e..46b7a63 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2172,35 +2172,16 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn_and_split "absdi2"
-  [(set (match_operand:DI 0 "register_operand" "=&r,w")
-	(abs:DI (match_operand:DI 1 "register_operand" "r,w")))]
+(define_expand "abs<mode>2"
+  [(match_operand:GPI 0 "register_operand" "")
+   (match_operand:GPI 1 "register_operand" "")]
   ""
-  "@
-   #
-   abs\\t%d0, %d1"
-  "reload_completed
-   && GP_REGNUM_P (REGNO (operands[0]))
-   && GP_REGNUM_P (REGNO (operands[1]))"
-  [(const_int 0)]
   {
-    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-			    gen_rtx_XOR (DImode,
-					 gen_rtx_ASHIFTRT (DImode,
-							   operands[1],
-							   GEN_INT (63)),
-					 operands[1])));
-    emit_insn (gen_rtx_SET (VOIDmode,
-			    operands[0],
-			    gen_rtx_MINUS (DImode,
-					   operands[0],
-					   gen_rtx_ASHIFTRT (DImode,
-							     operands[1],
-							     GEN_INT (63)))));
+    rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
+    rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx);
+    emit_insn (gen_csneg3<mode>_insn (operands[0], x, operands[1], operands[1]));
     DONE;
   }
-  [(set_attr "type" "alu_sreg")
-   (set_attr "simd" "no,yes")]
 )
 
 (define_insn "neg<mode>2"
@@ -2879,7 +2860,7 @@
   [(set_attr "type" "csel")]
 )
 
-(define_insn "*csneg3<mode>_insn"
+(define_insn "csneg3<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (if_then_else:GPI
 	  (match_operand 1 "aarch64_comparison_operation" "")
diff --git a/gcc/testsuite/gcc.target/aarch64/abs_1.c b/gcc/testsuite/gcc.target/aarch64/abs_1.c
index 938bc84..11f1095 100644
--- a/gcc/testsuite/gcc.target/aarch64/abs_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/abs_1.c
@@ -7,15 +7,14 @@ extern void abort (void);
 long long
 abs64 (long long a)
 {
-  /* { dg-final { scan-assembler "eor\t" } } */
-  /* { dg-final { scan-assembler "sub\t" } } */
+  /* { dg-final { scan-assembler "csneg\t" } } */
   return llabs (a);
 }
 
 long long
 abs64_in_dreg (long long a)
 {
-  /* { dg-final { scan-assembler "abs\td\[0-9\]+, d\[0-9\]+" } } */
+  /* { dg-final { scan-assembler "csneg\t" } } */
   register long long x asm ("d8") = a;
   register long long y asm ("d9");
   asm volatile ("" : : "w" (x));
-- 
1.9.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][AArch64] Use conditional negate for abs expansion
  2015-03-04 15:00       ` Wilco Dijkstra
@ 2015-03-04 15:16         ` Maxim Kuvyrkov
  0 siblings, 0 replies; 11+ messages in thread
From: Maxim Kuvyrkov @ 2015-03-04 15:16 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches

On Mar 4, 2015, at 6:00 PM, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> 
>> Maxim Kuvyrkov wrote:
>>> On Mar 4, 2015, at 3:30 PM, Wilco Dijkstra <wdijkstr@arm.com> wrote:
>>> 
>>>> Maxim Kuvyrkov wrote:
>>>> 
>>>> You are removing the 2nd alternative that generates "abs" with your patch.  While I agree
>> that
>>>> using "csneg" is faster on all implementations, can you say the same for "abs"?  Especially
>>>> given the fact that csneg requires 4 operands instead of abs'es 2?
>>> 
>>> Yes, given that latencies of scalar SIMD instructions are typically worse than integer
>>> latencies. The number of operands is not an issue.
>> 
>> One could make an argument that having an opportunity to use FP registers in high-reg-pressure
>> code is valuable.  But ... I can take your word for it.  All-in-all, I don't have objections
>> to your patch (note, this is a review, not approval, since I'm not an ARM maintainer).
> 
> The idea of spilling between integer and FP register files sounds great but GCC doesn't
> support it unfortunately...
> 
>>>> Wouldn't it be better to have (define_expand "abs<mode>2") that would expand into either
>>>> csneg3<mode> or second alternative of current absdi2?
>>> 
>>> How would that be possible? You'd have to delay expansion until after register allocation,
>>> which loses optimization opportunities.
>> 
>> If abs<mode>2 define_expand is given arguments in DI mode that are FP register -- emit
>> absdi2_insn.  Otherwise (which will be 95% of the time) do what your patch does.
> 
> I don't understand how that could ever happen - the new expansion happens before register
> allocation, so you'd never see FP registers as arguments.

A new instruction can be generated after reload by a folding optimizations or combine or other late optimizations.  It's rare, but happens.

> 
>>> And I still don't see how it would ever make sense
>>> to execute integer operations as scalar SIMD.
>>> 
>> 
>> Many GCC ports exploit ability to use FP registers for storage of integer values for the
>> benefit of high-reg-pressure code.  Ability to do some basic operations (like abs) on such
>> integer values is beneficial for the same reason.
> 
> I haven't seen any benefit - my previous patches severely discourage this kind of bad 
> allocation and gave major performance gains. Despite that it still happens way too often.
> 
>> However, almost always FP-alternatives are not the only ones in the instruction pattern, and
>> they are discouraged with "!".  If you can't use "!" to discourage them (which you can't in a
>> single-alternative case, like we got here) then your approach makes sense.  [Yes, I know, you
>> could fit that FP alternative into csneg pattern, but, probably, you would not want to spend
>> your time on it, and I don't think it's important enough to ask you to.]
> 
> Did you mean '?' ? It appears '?' discourages incorrect allocations while '*' seems to make
> things worse. Besides that, there are various issues with the cost calculation in ira-costs.c
> that result in incorrect register preferences.

Yes, I meant "*" and "?".

Thanks,

--
Maxim Kuvyrkov
www.linaro.org


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH][AArch64] Use conditional negate for abs expansion
  2015-03-04 12:49     ` Maxim Kuvyrkov
@ 2015-03-04 15:00       ` Wilco Dijkstra
  2015-03-04 15:16         ` Maxim Kuvyrkov
  0 siblings, 1 reply; 11+ messages in thread
From: Wilco Dijkstra @ 2015-03-04 15:00 UTC (permalink / raw)
  To: 'Maxim Kuvyrkov'; +Cc: GCC Patches

> Maxim Kuvyrkov wrote:
> > On Mar 4, 2015, at 3:30 PM, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> >
> >> Maxim Kuvyrkov wrote:
> >>
> >> You are removing the 2nd alternative that generates "abs" with your patch.  While I agree
> that
> >> using "csneg" is faster on all implementations, can you say the same for "abs"?  Especially
> >> given the fact that csneg requires 4 operands instead of abs'es 2?
> >
> > Yes, given that latencies of scalar SIMD instructions are typically worse than integer
> > latencies. The number of operands is not an issue.
> 
> One could make an argument that having an opportunity to use FP registers in high-reg-pressure
> code is valuable.  But ... I can take your word for it.  All-in-all, I don't have objections
> to your patch (note, this is a review, not approval, since I'm not an ARM maintainer).

The idea of spilling between integer and FP register files sounds great but GCC doesn't
support it unfortunately...

> >> Wouldn't it be better to have (define_expand "abs<mode>2") that would expand into either
> >> csneg3<mode> or second alternative of current absdi2?
> >
> > How would that be possible? You'd have to delay expansion until after register allocation,
> > which loses optimization opportunities.
> 
> If abs<mode>2 define_expand is given arguments in DI mode that are FP register -- emit
> absdi2_insn.  Otherwise (which will be 95% of the time) do what your patch does.

I don't understand how that could ever happen - the new expansion happens before register
allocation, so you'd never see FP registers as arguments.

> > And I still don't see how it would ever make sense
> > to execute integer operations as scalar SIMD.
> >
> 
> Many GCC ports exploit ability to use FP registers for storage of integer values for the
> benefit of high-reg-pressure code.  Ability to do some basic operations (like abs) on such
> integer values is beneficial for the same reason.

I haven't seen any benefit - my previous patches severely discourage this kind of bad 
allocation and gave major performance gains. Despite that it still happens way too often.

> However, almost always FP-alternatives are not the only ones in the instruction pattern, and
> they are discouraged with "!".  If you can't use "!" to discourage them (which you can't in a
> single-alternative case, like we got here) then your approach makes sense.  [Yes, I know, you
> could fit that FP alternative into csneg pattern, but, probably, you would not want to spend
> your time on it, and I don't think it's important enough to ask you to.]

Did you mean '?' ? It appears '?' discourages incorrect allocations while '*' seems to make
things worse. Besides that, there are various issues with the cost calculation in ira-costs.c
that result in incorrect register preferences.

Wilco



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][AArch64] Use conditional negate for abs expansion
  2015-03-04 12:31   ` Wilco Dijkstra
@ 2015-03-04 12:49     ` Maxim Kuvyrkov
  2015-03-04 15:00       ` Wilco Dijkstra
  0 siblings, 1 reply; 11+ messages in thread
From: Maxim Kuvyrkov @ 2015-03-04 12:49 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches

> On Mar 4, 2015, at 3:30 PM, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> 
>> Maxim Kuvyrkov wrote:
>> 
>> You are removing the 2nd alternative that generates "abs" with your patch.  While I agree that
>> using "csneg" is faster on all implementations, can you say the same for "abs"?  Especially
>> given the fact that csneg requires 4 operands instead of abs'es 2?
> 
> Yes, given that latencies of scalar SIMD instructions are typically worse than integer
> latencies. The number of operands is not an issue.

One could make an argument that having an opportunity to use FP registers in high-reg-pressure code is valuable.  But ... I can take your word for it.  All-in-all, I don't have objections to your patch (note, this is a review, not approval, since I'm not an ARM maintainer).

> 
>> Wouldn't it be better to have (define_expand "abs<mode>2") that would expand into either
>> csneg3<mode> or second alternative of current absdi2?
> 
> How would that be possible? You'd have to delay expansion until after register allocation,
> which loses optimization opportunities.

If abs<mode>2 define_expand is given arguments in DI mode that are FP register -- emit absdi2_insn.  Otherwise (which will be 95% of the time) do what your patch does.

> And I still don't see how it would ever make sense 
> to execute integer operations as scalar SIMD.
> 

Many GCC ports exploit ability to use FP registers for storage of integer values for the benefit of high-reg-pressure code.  Ability to do some basic operations (like abs) on such integer values is beneficial for the same reason.

However, almost always FP-alternatives are not the only ones in the instruction pattern, and they are discouraged with "!".  If you can't use "!" to discourage them (which you can't in a single-alternative case, like we got here) then your approach makes sense.  [Yes, I know, you could fit that FP alternative into csneg pattern, but, probably, you would not want to spend your time on it, and I don't think it's important enough to ask you to.]

Thanks,

--
Maxim Kuvyrkov
www.linaro.org

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH][AArch64] Use conditional negate for abs expansion
  2015-03-04  9:18 ` Maxim Kuvyrkov
@ 2015-03-04 12:31   ` Wilco Dijkstra
  2015-03-04 12:49     ` Maxim Kuvyrkov
  0 siblings, 1 reply; 11+ messages in thread
From: Wilco Dijkstra @ 2015-03-04 12:31 UTC (permalink / raw)
  To: 'Maxim Kuvyrkov'; +Cc: GCC Patches

> Maxim Kuvyrkov wrote:
>
> You are removing the 2nd alternative that generates "abs" with your patch.  While I agree that
> using "csneg" is faster on all implementations, can you say the same for "abs"?  Especially
> given the fact that csneg requires 4 operands instead of abs'es 2?

Yes, given that latencies of scalar SIMD instructions are typically worse than integer
latencies. The number of operands is not an issue.

> Wouldn't it be better to have (define_expand "abs<mode>2") that would expand into either
> csneg3<mode> or second alternative of current absdi2?

How would that be possible? You'd have to delay expansion until after register allocation,
which loses optimization opportunities. And I still don't see how it would ever make sense 
to execute integer operations as scalar SIMD.

Wilco

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][AArch64] Use conditional negate for abs expansion
  2015-03-03 16:19 Wilco Dijkstra
@ 2015-03-04  9:18 ` Maxim Kuvyrkov
  2015-03-04 12:31   ` Wilco Dijkstra
  0 siblings, 1 reply; 11+ messages in thread
From: Maxim Kuvyrkov @ 2015-03-04  9:18 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches

> On Mar 3, 2015, at 7:19 PM, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> 
> Expand abs into a compare and conditional negate. This is the most obvious expansion, enables
> merging of the comparison into ALU instructions and is faster on all implementations.



> Bootstrapped &
> regression tested.
> 
> int f(int x) { return abs (x + 1); }
> 
> Before:
>        add     w0, w0, 1
>        sxtw    x0, w0
>        eor     x1, x0, x0, asr 63
>        sub     x1, x1, x0, asr 63
>        mov     x0, x1
>        ret
> 
> After:
>        adds    w0, w0, 1
>        csneg   w0, w0, w0, pl
>        ret
> 
> ChangeLog:
> 
> 2015-03-03  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* gcc/config/aarch64/aarch64.md (absdi2): optimize abs expansion.
> 	(csneg3<mode>_insn): enable expansion of pattern.
> 	* gcc/testsuite/gcc.target/aarch64/abs_1.c (abs64): update test
> 	for new abs expansion.  (abs64_in_dreg): likewise.
> 
> ---
> gcc/config/aarch64/aarch64.md            | 33 +++++++-------------------------
> gcc/testsuite/gcc.target/aarch64/abs_1.c |  5 ++---
> 2 files changed, 9 insertions(+), 29 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 1f4169e..46b7a63 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -2172,35 +2172,16 @@
>   [(set_attr "type" "alu_ext")]
> )
> 
> -(define_insn_and_split "absdi2"
> -  [(set (match_operand:DI 0 "register_operand" "=&r,w")
> -	(abs:DI (match_operand:DI 1 "register_operand" "r,w")))]
> +(define_expand "abs<mode>2"
> +  [(match_operand:GPI 0 "register_operand" "")
> +   (match_operand:GPI 1 "register_operand" "")]
>   ""
> -  "@
> -   #
> -   abs\\t%d0, %d1"

You are removing the 2nd alternative that generates "abs" with your patch.  While I agree that using "csneg" is faster on all implementations, can you say the same for "abs"?  Especially given the fact that csneg requires 4 operands instead of abs'es 2?

Wouldn't it be better to have (define_expand "abs<mode>2") that would expand into either csneg3<mode> or second alternative of current absdi2?

Other than this, the patch looks OK.

Thank you,

--
Maxim Kuvyrkov
www.linaro.org


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH][AArch64] Use conditional negate for abs expansion
@ 2015-03-03 16:19 Wilco Dijkstra
  2015-03-04  9:18 ` Maxim Kuvyrkov
  0 siblings, 1 reply; 11+ messages in thread
From: Wilco Dijkstra @ 2015-03-03 16:19 UTC (permalink / raw)
  To: GCC Patches

Expand abs into a compare and conditional negate. This is the most obvious expansion, enables
merging of the comparison into ALU instructions and is faster on all implementations. Bootstrapped &
regression tested.

int f(int x) { return abs (x + 1); }

Before:
        add     w0, w0, 1
        sxtw    x0, w0
        eor     x1, x0, x0, asr 63
        sub     x1, x1, x0, asr 63
        mov     x0, x1
        ret

After:
        adds    w0, w0, 1
        csneg   w0, w0, w0, pl
        ret

ChangeLog:

2015-03-03  Wilco Dijkstra  <wdijkstr@arm.com>

	* gcc/config/aarch64/aarch64.md (absdi2): optimize abs expansion.
	(csneg3<mode>_insn): enable expansion of pattern.
	* gcc/testsuite/gcc.target/aarch64/abs_1.c (abs64): update test
	for new abs expansion.  (abs64_in_dreg): likewise.

---
 gcc/config/aarch64/aarch64.md            | 33 +++++++-------------------------
 gcc/testsuite/gcc.target/aarch64/abs_1.c |  5 ++---
 2 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1f4169e..46b7a63 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2172,35 +2172,16 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn_and_split "absdi2"
-  [(set (match_operand:DI 0 "register_operand" "=&r,w")
-	(abs:DI (match_operand:DI 1 "register_operand" "r,w")))]
+(define_expand "abs<mode>2"
+  [(match_operand:GPI 0 "register_operand" "")
+   (match_operand:GPI 1 "register_operand" "")]
   ""
-  "@
-   #
-   abs\\t%d0, %d1"
-  "reload_completed
-   && GP_REGNUM_P (REGNO (operands[0]))
-   && GP_REGNUM_P (REGNO (operands[1]))"
-  [(const_int 0)]
   {
-    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-			    gen_rtx_XOR (DImode,
-					 gen_rtx_ASHIFTRT (DImode,
-							   operands[1],
-							   GEN_INT (63)),
-					 operands[1])));
-    emit_insn (gen_rtx_SET (VOIDmode,
-			    operands[0],
-			    gen_rtx_MINUS (DImode,
-					   operands[0],
-					   gen_rtx_ASHIFTRT (DImode,
-							     operands[1],
-							     GEN_INT (63)))));
+    rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
+    rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx);
+    emit_insn (gen_csneg3<mode>_insn (operands[0], x, operands[1], operands[1]));
     DONE;
   }
-  [(set_attr "type" "alu_sreg")
-   (set_attr "simd" "no,yes")]
 )
 
 (define_insn "neg<mode>2"
@@ -2879,7 +2860,7 @@
   [(set_attr "type" "csel")]
 )
 
-(define_insn "*csneg3<mode>_insn"
+(define_insn "csneg3<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (if_then_else:GPI
 	  (match_operand 1 "aarch64_comparison_operation" "")
diff --git a/gcc/testsuite/gcc.target/aarch64/abs_1.c b/gcc/testsuite/gcc.target/aarch64/abs_1.c
index 938bc84..11f1095 100644
--- a/gcc/testsuite/gcc.target/aarch64/abs_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/abs_1.c
@@ -7,15 +7,14 @@ extern void abort (void);
 long long
 abs64 (long long a)
 {
-  /* { dg-final { scan-assembler "eor\t" } } */
-  /* { dg-final { scan-assembler "sub\t" } } */
+  /* { dg-final { scan-assembler "csneg\t" } } */
   return llabs (a);
 }
 
 long long
 abs64_in_dreg (long long a)
 {
-  /* { dg-final { scan-assembler "abs\td\[0-9\]+, d\[0-9\]+" } } */
+  /* { dg-final { scan-assembler "csneg\t" } } */
   register long long x asm ("d8") = a;
   register long long y asm ("d9");
   asm volatile ("" : : "w" (x));
-- 
1.9.1



^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-05-14 13:23 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-27 13:42 [PATCH][AArch64] Use conditional negate for abs expansion Wilco Dijkstra
2015-04-27 16:26 ` James Greenhalgh
2015-04-27 16:57   ` Wilco Dijkstra
2015-05-14  9:43     ` James Greenhalgh
2015-05-14 13:25       ` Wilco Dijkstra
  -- strict thread matches above, loose matches on Subject: below --
2015-03-03 16:19 Wilco Dijkstra
2015-03-04  9:18 ` Maxim Kuvyrkov
2015-03-04 12:31   ` Wilco Dijkstra
2015-03-04 12:49     ` Maxim Kuvyrkov
2015-03-04 15:00       ` Wilco Dijkstra
2015-03-04 15:16         ` Maxim Kuvyrkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).