public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [ARC PATCH] Improve DImode left shift by a single bit.
@ 2023-10-28 13:05 Roger Sayle
  2023-10-30 15:09 ` Jeff Law
  2023-11-03  9:04 ` Claudiu Zissulescu Ianculescu
  0 siblings, 2 replies; 5+ messages in thread
From: Roger Sayle @ 2023-10-28 13:05 UTC (permalink / raw)
  To: gcc-patches; +Cc: 'Claudiu Zissulescu'

[-- Attachment #1: Type: text/plain, Size: 1841 bytes --]


This patch improves the code generated for X << 1 (and for X + X) when
X is 64-bit DImode, using the same two instruction code sequence used
for DImode addition.

For the test case:

long long foo(long long x) { return x << 1; }

GCC -O2 currently generates the following code:

foo:    lsr     r2,r0,31
        asl_s   r1,r1,1
        asl_s   r0,r0,1
        j_s.d   [blink]
        or_s    r1,r1,r2

and on CPU without a barrel shifter, i.e. -mcpu=em

foo:    add.f   0,r0,r0
        asl_s   r1,r1
        rlc     r2,0
        asl_s   r0,r0
        j_s.d   [blink]
        or_s    r1,r1,r2

with this patch (both with and without a barrel shifter):

foo:    add.f   r0,r0,r0
        j_s.d   [blink]
        adc     r1,r1,r1

[For Jeff Law's benefit a similar optimization is also applicable to
H8300H, that could also use a two instruction sequence (plus rts) but
currently GCC generates 16 instructions (plus an rts) for foo above.]

Tested with a cross-compiler to arc-linux hosted on x86_64,
with no new (compile-only) regressions from make -k check.
Ok for mainline if this passes Claudiu's nightly testing?

2023-10-28  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/arc/arc.md (addsi3): Fix GNU-style code formatting.
        (adddi3): Change define_expand to generate an *adddi3.
        (*adddi3): New define_insn_and_split to lower DImode additions
        during the split1 pass (after combine and before reload).
        (ashldi3): New define_expand to (only) generate *ashldi3_cnt1
        for DImode left shifts by a single bit.
        (*ashldi3_cnt1): New define_insn_and_split to lower DImode
        left shifts by one bit to an *adddi3.

gcc/testsuite/ChangeLog
        * gcc.target/arc/adddi3-1.c: New test case.
        * gcc.target/arc/ashldi3-1.c: Likewise.


Thanks in advance,
Roger
--


[-- Attachment #2: patchar2.txt --]
[-- Type: text/plain, Size: 3550 bytes --]

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index ee43887..fe5f48c 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -2675,19 +2675,28 @@ archs4x, archs4xd"
 	(plus:SI (match_operand:SI 1 "register_operand" "")
 		 (match_operand:SI 2 "nonmemory_operand" "")))]
   ""
-  "if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2], false))
-     {
-       operands[2]=force_reg(SImode, operands[2]);
-     }
-  ")
+{
+  if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2], false))
+    operands[2] = force_reg (SImode, operands[2]);
+})
 
 (define_expand "adddi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand" "")
+	    (plus:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:DI 2 "nonmemory_operand" "")))
+       (clobber (reg:CC CC_REG))])])
+
+(define_insn_and_split "*adddi3"
   [(set (match_operand:DI 0 "register_operand" "")
 	(plus:DI (match_operand:DI 1 "register_operand" "")
 		 (match_operand:DI 2 "nonmemory_operand" "")))
    (clobber (reg:CC CC_REG))]
-  ""
-  "
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
   rtx l0 = gen_lowpart (SImode, operands[0]);
   rtx h0 = gen_highpart (SImode, operands[0]);
   rtx l1 = gen_lowpart (SImode, operands[1]);
@@ -2719,11 +2728,12 @@ archs4x, archs4xd"
 	   gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)),
 	   gen_rtx_SET (h0, plus_constant (SImode, h0, 1))));
       DONE;
-      }
+    }
   emit_insn (gen_add_f (l0, l1, l2));
   emit_insn (gen_adc (h0, h1, h2));
   DONE;
-")
+}
+  [(set_attr "length" "8")])
 
 (define_insn "add_f"
   [(set (reg:CC_C CC_REG)
@@ -3461,6 +3471,33 @@ archs4x, archs4xd"
   [(set_attr "type" "shift")
    (set_attr "length" "16,20")])
 
+;; DImode shifts
+
+(define_expand "ashldi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+	    (ashift:DI (match_operand:DI 1 "register_operand")
+		       (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+(define_insn_and_split "*ashldi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+	(ashift:DI (match_operand:DI 1 "register_operand")
+		   (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 1)))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+  [(set_attr "length" "8")])
+
 ;; Rotate instructions.
 
 (define_insn "rotrsi3_insn"
diff --git a/gcc/testsuite/gcc.target/arc/adddi3-1.c b/gcc/testsuite/gcc.target/arc/adddi3-1.c
new file mode 100644
index 0000000..b3077c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/adddi3-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long foo(long long x, long long y)
+{
+  return x + y;
+}
+
+/* { dg-final { scan-assembler "add.f\\s+r0,r0,r2" } } */
+/* { dg-final { scan-assembler "adc\\s+r1,r1,r3" } } */
diff --git a/gcc/testsuite/gcc.target/arc/ashldi3-1.c b/gcc/testsuite/gcc.target/arc/ashldi3-1.c
new file mode 100644
index 0000000..6fe4ff4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/ashldi3-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long foo(long long x)
+{
+  return x << 1;
+}
+
+/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */
+/* { dg-final { scan-assembler "adc\\s+r1,r1,r1" } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [ARC PATCH] Improve DImode left shift by a single bit.
  2023-10-28 13:05 [ARC PATCH] Improve DImode left shift by a single bit Roger Sayle
@ 2023-10-30 15:09 ` Jeff Law
  2023-10-30 15:27   ` Roger Sayle
  2023-11-03  9:04 ` Claudiu Zissulescu Ianculescu
  1 sibling, 1 reply; 5+ messages in thread
From: Jeff Law @ 2023-10-30 15:09 UTC (permalink / raw)
  To: Roger Sayle, gcc-patches; +Cc: 'Claudiu Zissulescu'



On 10/28/23 07:05, Roger Sayle wrote:
> 
> This patch improves the code generated for X << 1 (and for X + X) when
> X is 64-bit DImode, using the same two instruction code sequence used
> for DImode addition.
> 
> For the test case:
> 
> long long foo(long long x) { return x << 1; }
> 
> GCC -O2 currently generates the following code:
> 
> foo:    lsr     r2,r0,31
>          asl_s   r1,r1,1
>          asl_s   r0,r0,1
>          j_s.d   [blink]
>          or_s    r1,r1,r2
> 
> and on CPU without a barrel shifter, i.e. -mcpu=em
> 
> foo:    add.f   0,r0,r0
>          asl_s   r1,r1
>          rlc     r2,0
>          asl_s   r0,r0
>          j_s.d   [blink]
>          or_s    r1,r1,r2
> 
> with this patch (both with and without a barrel shifter):
> 
> foo:    add.f   r0,r0,r0
>          j_s.d   [blink]
>          adc     r1,r1,r1
> 
> [For Jeff Law's benefit a similar optimization is also applicable to
> H8300H, that could also use a two instruction sequence (plus rts) but
> currently GCC generates 16 instructions (plus an rts) for foo above.]
> 
> Tested with a cross-compiler to arc-linux hosted on x86_64,
> with no new (compile-only) regressions from make -k check.
> Ok for mainline if this passes Claudiu's nightly testing?
WRT H8.  Bug filed so we don't lose track of it.  We don't have DImode 
operations defined on the H8.  First step would be DImode loads/stores 
and basic arithmetic.

Jeff

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [ARC PATCH] Improve DImode left shift by a single bit.
  2023-10-30 15:09 ` Jeff Law
@ 2023-10-30 15:27   ` Roger Sayle
  2023-10-30 15:49     ` Jeff Law
  0 siblings, 1 reply; 5+ messages in thread
From: Roger Sayle @ 2023-10-30 15:27 UTC (permalink / raw)
  To: 'Jeff Law'; +Cc: gcc-patches

Hi Jeff,
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: 30 October 2023 15:09
> Subject: Re: [ARC PATCH] Improve DImode left shift by a single bit.
> 
> On 10/28/23 07:05, Roger Sayle wrote:
> >
> > This patch improves the code generated for X << 1 (and for X + X) when
> > X is 64-bit DImode, using the same two instruction code sequence used
> > for DImode addition.
> >
> > For the test case:
> >
> > long long foo(long long x) { return x << 1; }
> >
> > GCC -O2 currently generates the following code:
> >
> > foo:    lsr     r2,r0,31
> >          asl_s   r1,r1,1
> >          asl_s   r0,r0,1
> >          j_s.d   [blink]
> >          or_s    r1,r1,r2
> >
> > and on CPU without a barrel shifter, i.e. -mcpu=em
> >
> > foo:    add.f   0,r0,r0
> >          asl_s   r1,r1
> >          rlc     r2,0
> >          asl_s   r0,r0
> >          j_s.d   [blink]
> >          or_s    r1,r1,r2
> >
> > with this patch (both with and without a barrel shifter):
> >
> > foo:    add.f   r0,r0,r0
> >          j_s.d   [blink]
> >          adc     r1,r1,r1
> >
> > [For Jeff Law's benefit a similar optimization is also applicable to
> > H8300H, that could also use a two instruction sequence (plus rts) but
> > currently GCC generates 16 instructions (plus an rts) for foo above.]
> >
> > Tested with a cross-compiler to arc-linux hosted on x86_64, with no
> > new (compile-only) regressions from make -k check.
> > Ok for mainline if this passes Claudiu's nightly testing?
> WRT H8.  Bug filed so we don't lose track of it.  We don't have DImode operations
> defined on the H8.  First step would be DImode loads/stores and basic arithmetic.

The H8's machine description is impressively well organized.
Would it make sense to add a doubleword.md, or should DImode
support be added to each of the individual addsub.md, logical.md,
shiftrotate.md etc..?

The fact that register-to-register moves clobber some of the flags bits
must also make reload's task very difficult (impossible?).

Cheers,
Roger
--



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [ARC PATCH] Improve DImode left shift by a single bit.
  2023-10-30 15:27   ` Roger Sayle
@ 2023-10-30 15:49     ` Jeff Law
  0 siblings, 0 replies; 5+ messages in thread
From: Jeff Law @ 2023-10-30 15:49 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches



On 10/30/23 09:27, Roger Sayle wrote:

>> WRT H8.  Bug filed so we don't lose track of it.  We don't have DImode operations
>> defined on the H8.  First step would be DImode loads/stores and basic arithmetic.
> 
> The H8's machine description is impressively well organized.
> Would it make sense to add a doubleword.md, or should DImode
> support be added to each of the individual addsub.md, logical.md,
> shiftrotate.md etc..?
No strong opinion :-)  Back when I reorganized this stuff I was just 
trying to get it more manageable than a single huge .md file -- 
especially when I knew the port was going to grow 2x larger with the cc0 
conversion.

> 
> The fact that register-to-register moves clobber some of the flags bits
> must also make reload's task very difficult (impossible?).
The clobbering of CC doesn't show up until after reload.  At least 
they're not supposed to show up until then!  If they do, then it's a bug 
and a correctness issue waiting to happen.

Given the lack of registers on the H8 we never felt that supporting 
DImode in the target files was a good idea.  Just supporting a 64bit 
destructive add uses 5 of the 6 (or 7 if no frame pointer is needed) 
available registers.

Jeff

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [ARC PATCH] Improve DImode left shift by a single bit.
  2023-10-28 13:05 [ARC PATCH] Improve DImode left shift by a single bit Roger Sayle
  2023-10-30 15:09 ` Jeff Law
@ 2023-11-03  9:04 ` Claudiu Zissulescu Ianculescu
  1 sibling, 0 replies; 5+ messages in thread
From: Claudiu Zissulescu Ianculescu @ 2023-11-03  9:04 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches

Missed this one.

Ok, please proceed with the commit.

Thank you for your contribution,
Claudiu

On Sat, Oct 28, 2023 at 4:05 PM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> This patch improves the code generated for X << 1 (and for X + X) when
> X is 64-bit DImode, using the same two instruction code sequence used
> for DImode addition.
>
> For the test case:
>
> long long foo(long long x) { return x << 1; }
>
> GCC -O2 currently generates the following code:
>
> foo:    lsr     r2,r0,31
>         asl_s   r1,r1,1
>         asl_s   r0,r0,1
>         j_s.d   [blink]
>         or_s    r1,r1,r2
>
> and on CPU without a barrel shifter, i.e. -mcpu=em
>
> foo:    add.f   0,r0,r0
>         asl_s   r1,r1
>         rlc     r2,0
>         asl_s   r0,r0
>         j_s.d   [blink]
>         or_s    r1,r1,r2
>
> with this patch (both with and without a barrel shifter):
>
> foo:    add.f   r0,r0,r0
>         j_s.d   [blink]
>         adc     r1,r1,r1
>
> [For Jeff Law's benefit a similar optimization is also applicable to
> H8300H, that could also use a two instruction sequence (plus rts) but
> currently GCC generates 16 instructions (plus an rts) for foo above.]
>
> Tested with a cross-compiler to arc-linux hosted on x86_64,
> with no new (compile-only) regressions from make -k check.
> Ok for mainline if this passes Claudiu's nightly testing?
>
> 2023-10-28  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/arc/arc.md (addsi3): Fix GNU-style code formatting.
>         (adddi3): Change define_expand to generate an *adddi3.
>         (*adddi3): New define_insn_and_split to lower DImode additions
>         during the split1 pass (after combine and before reload).
>         (ashldi3): New define_expand to (only) generate *ashldi3_cnt1
>         for DImode left shifts by a single bit.
>         (*ashldi3_cnt1): New define_insn_and_split to lower DImode
>         left shifts by one bit to an *adddi3.
>
> gcc/testsuite/ChangeLog
>         * gcc.target/arc/adddi3-1.c: New test case.
>         * gcc.target/arc/ashldi3-1.c: Likewise.
>
>
> Thanks in advance,
> Roger
> --
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-11-03  9:04 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-28 13:05 [ARC PATCH] Improve DImode left shift by a single bit Roger Sayle
2023-10-30 15:09 ` Jeff Law
2023-10-30 15:27   ` Roger Sayle
2023-10-30 15:49     ` Jeff Law
2023-11-03  9:04 ` Claudiu Zissulescu Ianculescu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).