public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-5012] ARC: Improved SImode shifts and rotates with -mswap.
@ 2023-10-30 16:13 Roger Sayle
  0 siblings, 0 replies; only message in thread
From: Roger Sayle @ 2023-10-30 16:13 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d24c3c533454449f4bfe3db5a4c7d0eaff08e3c7

commit r14-5012-gd24c3c533454449f4bfe3db5a4c7d0eaff08e3c7
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Mon Oct 30 16:12:30 2023 +0000

    ARC: Improved SImode shifts and rotates with -mswap.
    
    This patch improves the code generated by the ARC back-end for CPUs
    without a barrel shifter but with -mswap.  The -mswap option provides
    a SWAP instruction that implements SImode rotations by 16, but also
    logical shift instructions (left and right) by 16 bits.  Clearly these
    are also useful building blocks for implementing shifts by 17, 18, etc.
    which would otherwise require a loop.
    
    As a representative example:
    int shl20 (int x) { return x << 20; }
    
    GCC with -O2 -mcpu=em -mswap would previously generate:
    
    shl20:  mov     lp_count,10
            lp      2f
            add     r0,r0,r0
            add     r0,r0,r0
    2:      # end single insn loop
            j_s     [blink]
    
    with this patch we now generate:
    
    shl20:  mov_s   r2,0    ;3
            lsl16   r0,r0
            add3    r0,r2,r0
            j_s.d   [blink]
            asl_s r0,r0
    
    Although both are four instructions (excluding the j_s),
    the original takes ~22 cycles, and replacement ~4 cycles.
    
    2023-10-30  Roger Sayle  <roger@nextmovesoftware.com>
    
    gcc/ChangeLog
            * config/arc/arc.cc (arc_split_ashl): Use lsl16 on TARGET_SWAP.
            (arc_split_ashr): Use swap and sign-extend on TARGET_SWAP.
            (arc_split_lshr): Use lsr16 on TARGET_SWAP.
            (arc_split_rotl): Use swap on TARGET_SWAP.
            (arc_split_rotr): Likewise.
            * config/arc/arc.md (ANY_ROTATE): New code iterator.
            (<ANY_ROTATE>si2_cnt16): New define_insn for alternate form of
            swap instruction on TARGET_SWAP.
            (ashlsi2_cnt16): Rename from *ashlsi16_cnt16 and move earlier.
            (lshrsi2_cnt16): New define_insn for LSR16 instruction.
            (*ashlsi2_cnt16): See above.
    
    gcc/testsuite/ChangeLog
            * gcc.target/arc/lsl16-1.c: New test case.
            * gcc.target/arc/lsr16-1.c: Likewise.
            * gcc.target/arc/swap-1.c: Likewise.
            * gcc.target/arc/swap-2.c: Likewise.

Diff:
---
 gcc/config/arc/arc.cc                  | 53 ++++++++++++++++++++++++++++++++++
 gcc/config/arc/arc.md                  | 43 ++++++++++++++++++++-------
 gcc/testsuite/gcc.target/arc/lsl16-1.c | 10 +++++++
 gcc/testsuite/gcc.target/arc/lsr16-1.c | 10 +++++++
 gcc/testsuite/gcc.target/arc/swap-1.c  |  9 ++++++
 gcc/testsuite/gcc.target/arc/swap-2.c  | 11 +++++++
 6 files changed, 125 insertions(+), 11 deletions(-)

diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index 353ac69de34d..e98692a70b4c 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -4256,6 +4256,17 @@ arc_split_ashl (rtx *operands)
 	    }
 	  return;
 	}
+      else if (n >= 16 && n <= 22 && TARGET_SWAP && TARGET_V2)
+	{
+	  emit_insn (gen_ashlsi2_cnt16 (operands[0], operands[1]));
+	  if (n > 16)
+	    {
+	      operands[1] = operands[0];
+	      operands[2] = GEN_INT (n - 16);
+	      arc_split_ashl (operands);
+	    }
+	  return;
+	}
       else if (n >= 29)
 	{
 	  if (n < 31)
@@ -4300,6 +4311,15 @@ arc_split_ashr (rtx *operands)
 	    emit_move_insn (operands[0], operands[1]);
 	  return;
 	}
+      else if (n >= 16 && n <= 18 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotrsi2_cnt16 (operands[0], operands[1]));
+	  emit_insn (gen_extendhisi2 (operands[0],
+				      gen_lowpart (HImode, operands[0])));
+	  while (--n >= 16)
+	    emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
       else if (n == 30)
 	{
 	  rtx tmp = gen_reg_rtx (SImode);
@@ -4339,6 +4359,13 @@ arc_split_lshr (rtx *operands)
 	    emit_move_insn (operands[0], operands[1]);
 	  return;
 	}
+      else if (n >= 16 && n <= 19 && TARGET_SWAP && TARGET_V2)
+	{
+	  emit_insn (gen_lshrsi2_cnt16 (operands[0], operands[1]));
+	  while (--n >= 16)
+	    emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
       else if (n == 30)
 	{
 	  rtx tmp = gen_reg_rtx (SImode);
@@ -4385,6 +4412,19 @@ arc_split_rotl (rtx *operands)
 	    emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
 	  return;
 	}
+      else if (n >= 13 && n <= 16 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotlsi2_cnt16 (operands[0], operands[1]));
+	  while (++n <= 16)
+	    emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
+      else if (n == 17 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotlsi2_cnt16 (operands[0], operands[1]));
+	  emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
       else if (n >= 16 || n == 12 || n == 14)
 	{
 	  emit_insn (gen_rotrsi3_loop (operands[0], operands[1],
@@ -4415,6 +4455,19 @@ arc_split_rotr (rtx *operands)
 	    emit_move_insn (operands[0], operands[1]);
 	  return;
 	}
+      else if (n == 15 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotrsi2_cnt16 (operands[0], operands[1]));
+	  emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
+      else if (n >= 16 && n <= 19 && TARGET_SWAP)
+	{
+	  emit_insn (gen_rotrsi2_cnt16 (operands[0], operands[1]));
+	  while (--n >= 16)
+	    emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
       else if (n >= 30)
 	{
 	  emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[1]));
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index ee438872dd23..11cd22cf1b34 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -3353,6 +3353,7 @@ archs4x, archs4xd"
 
 ;; Shift instructions.
 
+(define_code_iterator ANY_ROTATE [rotate rotatert])
 (define_code_iterator ANY_SHIFT_ROTATE [ashift ashiftrt lshiftrt
 					rotate rotatert])
 
@@ -3415,6 +3416,37 @@ archs4x, archs4xd"
    (set_attr "predicable" "no,no,yes,no,no")
    (set_attr "cond" "nocond,canuse,canuse,nocond,nocond")])
 
+(define_insn "<insn>si2_cnt16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ANY_ROTATE:SI (match_operand:SI 1 "register_operand" "r")
+		       (const_int 16)))]
+  "TARGET_SWAP"
+  "swap\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "two_cycle_core")])
+
+(define_insn "ashlsi2_cnt16"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(ashift:SI (match_operand:SI 1 "nonmemory_operand" "rL")
+		   (const_int 16)))]
+  "TARGET_SWAP && TARGET_V2"
+  "lsl16\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "false")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
+(define_insn "lshrsi2_cnt16"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	(lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "rL")
+		     (const_int 16)))]
+  "TARGET_SWAP && TARGET_V2"
+  "lsr16\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "false")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
 ;; Split asl dst,1,src into bset dst,0,src.
 (define_insn_and_split "*ashlsi3_1"
   [(set (match_operand:SI 0 "dest_reg_operand")
@@ -5929,17 +5961,6 @@ archs4x, archs4xd"
    (set_attr "length" "4")
    (set_attr "predicable" "no")])
 
-(define_insn "*ashlsi2_cnt16"
-  [(set (match_operand:SI 0 "register_operand"            "=r")
-	(ashift:SI (match_operand:SI 1 "nonmemory_operand" "rL")
-		   (const_int 16)))]
-  "TARGET_SWAP && TARGET_V2"
-  "lsl16\\t%0,%1"
-  [(set_attr "type" "shift")
-   (set_attr "iscompact" "false")
-   (set_attr "length" "4")
-   (set_attr "predicable" "no")])
-
 (define_insn "lshrsi3_cnt1"
   [(set (match_operand:SI 0 "dest_reg_operand"             "=q,w")
 	(lshiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
diff --git a/gcc/testsuite/gcc.target/arc/lsl16-1.c b/gcc/testsuite/gcc.target/arc/lsl16-1.c
new file mode 100644
index 000000000000..cbd0daed0a25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/lsl16-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em -mswap" } */
+
+int foo(int x)
+{
+  return x << 16;
+}
+
+/* { dg-final { scan-assembler "lsl16\\s+r0,r0" } } */
+
diff --git a/gcc/testsuite/gcc.target/arc/lsr16-1.c b/gcc/testsuite/gcc.target/arc/lsr16-1.c
new file mode 100644
index 000000000000..8ce5f1329989
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/lsr16-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em -mswap" } */
+
+unsigned int foo(unsigned int x)
+{
+  return x >> 16;
+}
+
+/* { dg-final { scan-assembler "lsr16\\s+r0,r0" } } */
+
diff --git a/gcc/testsuite/gcc.target/arc/swap-1.c b/gcc/testsuite/gcc.target/arc/swap-1.c
new file mode 100644
index 000000000000..71afc750cb90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/swap-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em -mswap" } */
+
+int foo(int x)
+{
+  return ((unsigned int)x >> 16) | (x << 16);
+}
+
+/* { dg-final { scan-assembler "swap\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/swap-2.c b/gcc/testsuite/gcc.target/arc/swap-2.c
new file mode 100644
index 000000000000..bf123921fdd3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/swap-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em -mswap" } */
+
+int foo(int x)
+{
+  return x >> 16;
+}
+
+/* { dg-final { scan-assembler "swap\\s+r0,r0" } } */
+/* { dg-final { scan-assembler "sexh_s\\s+r0,r0" } } */
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-10-30 16:13 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-30 16:13 [gcc r14-5012] ARC: Improved SImode shifts and rotates with -mswap Roger Sayle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).