public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-4892] ARC: Improved SImode shifts and rotates on !TARGET_BARREL_SHIFTER.
@ 2023-10-24 15:44 Roger Sayle
  0 siblings, 0 replies; only message in thread
From: Roger Sayle @ 2023-10-24 15:44 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:35f4e95265b9e89c923b349988654f1da6348a44

commit r14-4892-g35f4e95265b9e89c923b349988654f1da6348a44
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Tue Oct 24 16:42:10 2023 +0100

    ARC: Improved SImode shifts and rotates on !TARGET_BARREL_SHIFTER.
    
    This patch completes the ARC back-end's transition to using pre-reload
    splitters for SImode shifts and rotates on targets without a barrel
    shifter.  The core part is that the shift_si3 define_insn is no longer
    needed, as shifts and rotates that don't require a loop are split
    before reload, and then because shift_si3_loop is the only caller
    of output_shift, both can be significantly cleaned up and simplified.
    The output_shift function (Claudiu's "the elephant in the room") is
    renamed output_shift_loop, which handles just the four instruction
    zero-overhead loop implementations.
    
    Aside from the clean-ups, the user visible changes are much improved
    implementations of SImode shifts and rotates on affected targets.
    
    For the function:
    unsigned int rotr_1 (unsigned int x) { return (x >> 1) | (x << 31); }
    
    GCC with -O2 -mcpu=em would previously generate:
    
    rotr_1: lsr_s r2,r0
            bmsk_s r0,r0,0
            ror     r0,r0
            j_s.d   [blink]
            or_s    r0,r0,r2
    
    with this patch, we now generate:
    
            j_s.d   [blink]
            ror     r0,r0
    
    For the function:
    unsigned int rotr_31 (unsigned int x) { return (x >> 31) | (x << 1); }
    
    GCC with -O2 -mcpu=em would previously generate:
    
    rotr_31:
            mov_s   r2,r0   ;4
            asl_s r0,r0
            add.f 0,r2,r2
            rlc r2,0
            j_s.d   [blink]
            or_s    r0,r0,r2
    
    with this patch we now generate an add.f followed by an adc:
    
    rotr_31:
            add.f   r0,r0,r0
            j_s.d   [blink]
            add.cs  r0,r0,1
    
    Shifts by constants requiring a loop have been improved for even counts
    by performing two operations in each iteration:
    
    int shl10(int x) { return x >> 10; }
    
    Previously looked like:
    
    shl10:  mov.f lp_count, 10
            lpnz    2f
            asr r0,r0
            nop
    2:      # end single insn loop
            j_s     [blink]
    
    And now becomes:
    
    shl10:
            mov     lp_count,5
            lp      2f
            asr     r0,r0
            asr     r0,r0
    2:      # end single insn loop
            j_s     [blink]
    
    So emulating ARC's SWAP on architectures that don't have it:
    
    unsigned int rotr_16 (unsigned int x) { return (x >> 16) | (x << 16); }
    
    previously required 10 instructions and ~70 cycles:
    
    rotr_16:
            mov_s   r2,r0   ;4
            mov.f lp_count, 16
            lpnz    2f
            add r0,r0,r0
            nop
    2:      # end single insn loop
            mov.f lp_count, 16
            lpnz    2f
            lsr r2,r2
            nop
    2:      # end single insn loop
            j_s.d   [blink]
            or_s    r0,r0,r2
    
    now becomes just 4 instructions and ~18 cycles:
    
    rotr_16:
            mov     lp_count,8
            lp      2f
            ror     r0,r0
            ror     r0,r0
    2:      # end single insn loop
            j_s     [blink]
    
    2023-10-24  Roger Sayle  <roger@nextmovesoftware.com>
                Claudiu Zissulescu  <claziss@gmail.com>
    
    gcc/ChangeLog
            * config/arc/arc-protos.h (output_shift): Rename to...
            (output_shift_loop): Tweak API to take an explicit rtx_code.
            (arc_split_ashl): Prototype new function here.
            (arc_split_ashr): Likewise.
            (arc_split_lshr): Likewise.
            (arc_split_rotl): Likewise.
            (arc_split_rotr): Likewise.
            * config/arc/arc.cc (output_shift): Delete local prototype.  Rename.
            (output_shift_loop): New function replacing output_shift to output
            a zero overheap loop for SImode shifts and rotates on ARC targets
            without barrel shifter (i.e. no hardware support for these insns).
            (arc_split_ashl): New helper function to split *ashlsi3_nobs.
            (arc_split_ashr): New helper function to split *ashrsi3_nobs.
            (arc_split_lshr): New helper function to split *lshrsi3_nobs.
            (arc_split_rotl): New helper function to split *rotlsi3_nobs.
            (arc_split_rotr): New helper function to split *rotrsi3_nobs.
            (arc_print_operand): Correct whitespace.
            (arc_rtx_costs): Likewise.
            (hwloop_optimize): Likewise.
            * config/arc/arc.md (ANY_SHIFT_ROTATE): New define_code_iterator.
            (define_code_attr insn): New code attribute to map to pattern name.
            (<ANY_SHIFT_ROTATE>si3): New expander unifying previous ashlsi3,
            ashrsi3 and lshrsi3 define_expands.  Adds rotlsi3 and rotrsi3.
            (*<ANY_SHIFT_ROTATE>si3_nobs): New define_insn_and_split that
            unifies the previous *ashlsi3_nobs, *ashrsi3_nobs and *lshrsi3_nobs.
            We now call arc_split_<insn> in arc.cc to implement each split.
            (shift_si3): Delete define_insn, all shifts/rotates are now split.
            (shift_si3_loop): Rename to...
            (<insn>si3_loop): define_insn to handle loop implementations of
            SImode shifts and rotates, calling ouput_shift_loop for template.
            (rotrsi3): Rename to...
            (*rotrsi3_insn): define_insn for TARGET_BARREL_SHIFTER's ror.
            (*rotlsi3): New define_insn_and_split to transform left rotates
            into right rotates before reload.
            (rotlsi3_cnt1): New define_insn_and_split to implement a left
            rotate by one bit using an add.f followed by an adc.
            * config/arc/predicates.md (shiftr4_operator): Delete.

Diff:
---
 gcc/config/arc/arc-protos.h  |   7 +-
 gcc/config/arc/arc.cc        | 371 ++++++++++++++++++++++++++++++-------------
 gcc/config/arc/arc.md        | 230 +++++++--------------------
 gcc/config/arc/predicates.md |   9 --
 4 files changed, 324 insertions(+), 293 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 026ea99c9c64..a48d850bc648 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -25,7 +25,12 @@ extern machine_mode arc_select_cc_mode (enum rtx_code, rtx, rtx);
 extern struct rtx_def *gen_compare_reg (rtx, machine_mode);
 
 /* Declarations for various fns used in the .md file.  */
-extern const char *output_shift (rtx *);
+extern const char *output_shift_loop (enum rtx_code, rtx *);
+extern void arc_split_ashl (rtx *);
+extern void arc_split_ashr (rtx *);
+extern void arc_split_lshr (rtx *);
+extern void arc_split_rotl (rtx *);
+extern void arc_split_rotr (rtx *);
 extern bool compact_sda_memory_operand (rtx, machine_mode, bool);
 extern bool arc_double_limm_p (rtx);
 extern void arc_print_operand (FILE *, rtx, int);
diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index 00427d859cc4..353ac69de34d 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -241,7 +241,6 @@ static int branch_dest (rtx);
 static void  arc_output_pic_addr_const (FILE *,  rtx, int);
 static bool arc_function_ok_for_sibcall (tree, tree);
 static rtx arc_function_value (const_tree, const_tree, bool);
-const char * output_shift (rtx *);
 static void arc_reorg (void);
 static bool arc_in_small_data_p (const_tree);
 
@@ -4151,143 +4150,287 @@ arc_pre_reload_split (void)
 	  && !(cfun->curr_properties & PROP_rtl_split_insns));
 }
 
-/* Output the assembler code for doing a shift.
-   We go to a bit of trouble to generate efficient code as the ARC601 only has
-   single bit shifts.  This is taken from the h8300 port.  We only have one
-   mode of shifting and can't access individual bytes like the h8300 can, so
-   this is greatly simplified (at the expense of not generating hyper-
-   efficient code).
-
-   This function is not used if the variable shift insns are present.  */
-
-/* FIXME:  This probably can be done using a define_split in arc.md.
-   Alternately, generate rtx rather than output instructions.  */
+/* Output the assembler code for a zero-overhead loop doing a shift
+   or rotate.  We know OPERANDS[0] == OPERANDS[1], and the bit count
+   is OPERANDS[2].  */
 
 const char *
-output_shift (rtx *operands)
+output_shift_loop (enum rtx_code code, rtx *operands)
 {
-  /*  static int loopend_lab;*/
-  rtx shift = operands[3];
-  machine_mode mode = GET_MODE (shift);
-  enum rtx_code code = GET_CODE (shift);
-  const char *shift_one;
-
-  gcc_assert (mode == SImode);
-
-  switch (code)
-    {
-    case ASHIFT:   shift_one = "add %0,%1,%1"; break;
-    case ASHIFTRT: shift_one = "asr %0,%1"; break;
-    case LSHIFTRT: shift_one = "lsr %0,%1"; break;
-    default:       gcc_unreachable ();
-    }
+  bool twice_p = false;
+  gcc_assert (GET_MODE (operands[0]) == SImode);
 
   if (GET_CODE (operands[2]) != CONST_INT)
     {
-      output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
-      goto shiftloop;
+      output_asm_insn ("and.f\tlp_count,%2,0x1f", operands);
+      output_asm_insn ("lpnz\t2f", operands);
     }
   else
     {
-      int n;
+      int n = INTVAL (operands[2]) & 31;
+      if (!n)
+	{
+	  output_asm_insn ("mov\t%0,%1",operands);
+	  return "";
+	}
 
-      n = INTVAL (operands[2]);
+      if ((n & 1) == 0 && code != ROTATE)
+	{
+	  twice_p = true;
+	  n >>= 1;
+	}
+      operands[2] = GEN_INT (n);
+      output_asm_insn ("mov\tlp_count,%2", operands);
+      output_asm_insn ("lp\t2f", operands);
+    }
 
-      /* Only consider the lower 5 bits of the shift count.  */
-      n = n & 0x1f;
+  switch (code)
+    {
+    case ASHIFT:
+      output_asm_insn ("add\t%0,%1,%1", operands);
+      if (twice_p)
+	output_asm_insn ("add\t%0,%1,%1", operands);
+      break;
+    case ASHIFTRT:
+      output_asm_insn ("asr\t%0,%1", operands);
+      if (twice_p)
+	output_asm_insn ("asr\t%0,%1", operands);
+      break;
+    case LSHIFTRT:
+      output_asm_insn ("lsr\t%0,%1", operands);
+      if (twice_p)
+	output_asm_insn ("lsr\t%0,%1", operands);
+      break;
+    case ROTATERT:
+      output_asm_insn ("ror\t%0,%1", operands);
+      if (twice_p)
+	output_asm_insn ("ror\t%0,%1", operands);
+      break;
+    case ROTATE:
+      output_asm_insn ("add.f\t%0,%1,%1", operands);
+      output_asm_insn ("adc\t%0,%0,0", operands);
+      twice_p = true;
+      break;
+    default:
+      gcc_unreachable ();
+    }
 
-      /* First see if we can do them inline.  */
-      /* ??? We could get better scheduling & shorter code (using short insns)
-	 by using splitters.  Alas, that'd be even more verbose.  */
-      if (code == ASHIFT && n <= 9 && n > 2
-	  && dest_reg_operand (operands[4], SImode))
+  if (!twice_p)
+    output_asm_insn ("nop", operands);
+  fprintf (asm_out_file, "2:\t%s end single insn loop\n", ASM_COMMENT_START);
+  return "";
+}
+
+
+/* Split SImode left shift instruction.  */
+void
+arc_split_ashl (rtx *operands)
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      int n = INTVAL (operands[2]) & 0x1f;
+      if (n <= 9)
 	{
-	  output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
-	  for (n -=3 ; n >= 3; n -= 3)
-	    output_asm_insn ("add3 %0,%4,%0", operands);
-	  if (n == 2)
-	    output_asm_insn ("add2 %0,%4,%0", operands);
-	  else if (n)
-	    output_asm_insn ("add %0,%0,%0", operands);
+	  if (n == 0)
+	    emit_move_insn (operands[0], operands[1]);
+	  else if (n <= 2)
+	    {
+	      emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[1]));
+	      if (n == 2)
+		emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[0]));
+	    }
+	  else
+	    {
+	      rtx zero = gen_reg_rtx (SImode);
+	      emit_move_insn (zero, const0_rtx);
+	      emit_insn (gen_add_shift (operands[0], operands[1],
+					GEN_INT (3), zero));
+	      for (n -= 3; n >= 3; n -= 3)
+		emit_insn (gen_add_shift (operands[0], operands[0],
+					  GEN_INT (3), zero));
+	      if (n == 2)
+		emit_insn (gen_add_shift (operands[0], operands[0],
+					  const2_rtx, zero));
+	      else if (n)
+		emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[0]));
+	    }
+	  return;
 	}
-      else if (n <= 4)
+      else if (n >= 29)
 	{
-	  while (--n >= 0)
+	  if (n < 31)
 	    {
-	      output_asm_insn (shift_one, operands);
-	      operands[1] = operands[0];
+	      if (n == 29)
+		{
+		  emit_insn (gen_andsi3_i (operands[0], operands[1],
+					   GEN_INT (7)));
+		  emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+		}
+	      else
+		emit_insn (gen_andsi3_i (operands[0], operands[1],
+					 GEN_INT (3)));
+	      emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
 	    }
+	  else
+	    emit_insn (gen_andsi3_i (operands[0], operands[1], const1_rtx));
+	  emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+	  return;
 	}
-      /* See if we can use a rotate/and.  */
-      else if (n == BITS_PER_WORD - 1)
+    }
+
+  emit_insn (gen_ashlsi3_loop (operands[0], operands[1], operands[2]));
+}
+
+/* Split SImode arithmetic right shift instruction.  */
+void
+arc_split_ashr (rtx *operands)
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      int n = INTVAL (operands[2]) & 0x1f;
+      if (n <= 4)
 	{
-	  switch (code)
+	  if (n != 0)
 	    {
-	    case ASHIFT :
-	      output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
-	      break;
-	    case ASHIFTRT :
-	      /* The ARC doesn't have a rol insn.  Use something else.  */
-	      output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
-	      break;
-	    case LSHIFTRT :
-	      /* The ARC doesn't have a rol insn.  Use something else.  */
-	      output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
-	      break;
-	    default:
-	      break;
+	      emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[1]));
+	      while (--n > 0)
+		emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[0]));
+	    }
+	  else
+	    emit_move_insn (operands[0], operands[1]);
+	  return;
+	}
+      else if (n == 30)
+	{
+	  rtx tmp = gen_reg_rtx (SImode);
+	  emit_insn (gen_add_f (tmp, operands[1], operands[1]));
+	  emit_insn (gen_sbc (operands[0], operands[0], operands[0]));
+	  emit_insn (gen_addsi_compare_2 (tmp, tmp));
+	  emit_insn (gen_adc (operands[0], operands[0], operands[0]));
+	  return;
+	}
+      else if (n == 31)
+	{
+	  emit_insn (gen_addsi_compare_2 (operands[1], operands[1]));
+	  emit_insn (gen_sbc (operands[0], operands[0], operands[0]));
+	  return;
+	}
+    }
+
+  emit_insn (gen_ashrsi3_loop (operands[0], operands[1], operands[2]));
+}
+
+/* Split SImode logical right shift instruction.  */
+void
+arc_split_lshr (rtx *operands)
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      int n = INTVAL (operands[2]) & 0x1f;
+      if (n <= 4)
+	{
+	  if (n != 0)
+	    {
+	      emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[1]));
+	      while (--n > 0)
+		emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[0]));
 	    }
+	  else
+	    emit_move_insn (operands[0], operands[1]);
+	  return;
 	}
-      else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
+      else if (n == 30)
 	{
-	  switch (code)
+	  rtx tmp = gen_reg_rtx (SImode);
+	  emit_insn (gen_add_f (tmp, operands[1], operands[1]));
+	  emit_insn (gen_scc_ltu_cc_c (operands[0]));
+	  emit_insn (gen_addsi_compare_2 (tmp, tmp));
+	  emit_insn (gen_adc (operands[0], operands[0], operands[0]));
+	  return;
+	}
+      else if (n == 31)
+	{
+	  emit_insn (gen_addsi_compare_2 (operands[1], operands[1]));
+	  emit_insn (gen_scc_ltu_cc_c (operands[0]));
+	  return;
+	}
+    }
+
+  emit_insn (gen_lshrsi3_loop (operands[0], operands[1], operands[2]));
+}
+
+/* Split SImode rotate left instruction.  */
+void
+arc_split_rotl (rtx *operands)
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      int n = INTVAL (operands[2]) & 0x1f;
+      if (n <= 2)
+	{
+	  if (n != 0)
 	    {
-	    case ASHIFT :
-	      output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
-	      break;
-	    case ASHIFTRT :
-#if 1 /* Need some scheduling comparisons.  */
-	      output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
-			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
-#else
-	      output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
-			       "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
-#endif
-	      break;
-	    case LSHIFTRT :
-#if 1
-	      output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
-			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
-#else
-	      output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
-			       "and %0,%0,1\n\trlc %0,%0", operands);
-#endif
-	      break;
-	    default:
-	      break;
+	      emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[1]));
+	      if (n == 2)
+		emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[0]));
 	    }
+	  else
+	    emit_move_insn (operands[0], operands[1]);
+	  return;
 	}
-      else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
-	output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
-			 operands);
-      /* Must loop.  */
-      else
+      else if (n >= 28)
+	{
+	  emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[1]));
+	  while (++n < 32)
+	    emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+	  return;
+	}
+      else if (n >= 16 || n == 12 || n == 14)
 	{
-	  operands[2] = GEN_INT (n);
-	  output_asm_insn ("mov.f lp_count, %2", operands);
+	  emit_insn (gen_rotrsi3_loop (operands[0], operands[1],
+				       GEN_INT (32 - n)));
+	  return;
+	}
+    }
 
-	shiftloop:
+  emit_insn (gen_rotlsi3_loop (operands[0], operands[1], operands[2]));
+}
+
+/* Split SImode rotate right instruction.  */
+void
+arc_split_rotr (rtx *operands)
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      int n = INTVAL (operands[2]) & 0x1f;
+      if (n <= 4)
+	{
+	  if (n != 0)
 	    {
-	      output_asm_insn ("lpnz\t2f", operands);
-	      output_asm_insn (shift_one, operands);
-	      output_asm_insn ("nop", operands);
-	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
-		       ASM_COMMENT_START);
+	      emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[1]));
+	      while (--n > 0)
+		emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
 	    }
+	  else
+	    emit_move_insn (operands[0], operands[1]);
+	  return;
+	}
+      else if (n >= 30)
+	{
+	  emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[1]));
+	  if (n == 31)
+	    emit_insn (gen_rotlsi3_cnt1 (operands[1], operands[1]));
+	  return;
+	}
+      else if (n >= 21 || n == 17 || n == 19)
+	{
+	  emit_insn (gen_rotrsi3_loop (operands[0], operands[1],
+				       GEN_INT (32 - n)));
+	  return;
 	}
     }
 
-  return "";
+  emit_insn (gen_rotrsi3_loop (operands[0], operands[1], operands[2]));
 }
 \f
 /* Nested function support.  */
@@ -4459,9 +4602,9 @@ arc_print_operand (FILE *file, rtx x, int code)
 
     case 'c':
       if (GET_CODE (x) == CONST_INT)
-        fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) );
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) );
       else
-        output_operand_lossage ("invalid operands to %%c code");
+	output_operand_lossage ("invalid operands to %%c code");
 
       return;
 
@@ -5433,8 +5576,8 @@ arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
 
       if ((GET_CODE (XEXP (x, 0)) == ASHIFT
 	   && _1_2_3_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
-          || (GET_CODE (XEXP (x, 0)) == MULT
-              && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode)))
+	  || (GET_CODE (XEXP (x, 0)) == MULT
+	      && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode)))
 	{
 	  if (CONSTANT_P (XEXP (x, 1)) && !speed)
 	    *total += COSTS_N_INSNS (4);
@@ -5445,8 +5588,8 @@ arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
     case MINUS:
       if ((GET_CODE (XEXP (x, 1)) == ASHIFT
 	   && _1_2_3_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
-          || (GET_CODE (XEXP (x, 1)) == MULT
-              && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode)))
+	  || (GET_CODE (XEXP (x, 1)) == MULT
+	      && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode)))
 	{
 	  if (CONSTANT_P (XEXP (x, 0)) && !speed)
 	    *total += COSTS_N_INSNS (4);
@@ -7546,9 +7689,9 @@ hwloop_optimize (hwloop_info loop)
   if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg)) != LP_COUNT)
     {
       if (dump_file)
-        fprintf (dump_file, ";; loop %d doesn't use lp_count as loop"
+	fprintf (dump_file, ";; loop %d doesn't use lp_count as loop"
 		 " iterator\n",
-                 loop->loop_no);
+		 loop->loop_no);
       /* This loop doesn't use the lp_count, check though if we can
 	 fix it.  */
       if (TEST_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT)
@@ -7721,7 +7864,7 @@ hwloop_optimize (hwloop_info loop)
 		 /* Make sure we don't split a call and its corresponding
 		    CALL_ARG_LOCATION note.  */
 		 && NOTE_KIND (entry_after) != NOTE_INSN_CALL_ARG_LOCATION))
-        entry_after = NEXT_INSN (entry_after);
+	entry_after = NEXT_INSN (entry_after);
 #endif
       entry_after = next_nonnote_insn_bb (entry_after);
 
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 325e4f56b9bc..ee438872dd23 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -3353,22 +3353,16 @@ archs4x, archs4xd"
 
 ;; Shift instructions.
 
-(define_expand "ashlsi3"
-  [(set (match_operand:SI 0 "dest_reg_operand" "")
-	(ashift:SI (match_operand:SI 1 "register_operand" "")
-		   (match_operand:SI 2 "nonmemory_operand" "")))]
-  "")
+(define_code_iterator ANY_SHIFT_ROTATE [ashift ashiftrt lshiftrt
+					rotate rotatert])
 
-(define_expand "ashrsi3"
-  [(set (match_operand:SI 0 "dest_reg_operand" "")
-	(ashiftrt:SI (match_operand:SI 1 "register_operand" "")
-		     (match_operand:SI 2 "nonmemory_operand" "")))]
-  "")
+(define_code_attr insn [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr")
+			(rotate "rotl") (rotatert "rotr")])
 
-(define_expand "lshrsi3"
+(define_expand "<insn>si3"
   [(set (match_operand:SI 0 "dest_reg_operand" "")
-	(lshiftrt:SI (match_operand:SI 1 "register_operand" "")
-		     (match_operand:SI 2 "nonmemory_operand" "")))]
+	(ANY_SHIFT_ROTATE:SI (match_operand:SI 1 "register_operand" "")
+			     (match_operand:SI 2 "nonmemory_operand" "")))]
   "")
 
 ; asl, asr, lsr patterns:
@@ -3437,117 +3431,10 @@ archs4x, archs4xd"
   [(set_attr "type" "shift")
    (set_attr "length" "8")])
 
-(define_insn_and_split "*ashlsi3_nobs"
-  [(set (match_operand:SI 0 "dest_reg_operand")
-	(ashift:SI (match_operand:SI 1 "register_operand")
-		   (match_operand:SI 2 "nonmemory_operand")))]
-  "!TARGET_BARREL_SHIFTER
-   && operands[2] != const1_rtx
-   && arc_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  if (CONST_INT_P (operands[2]))
-    {
-      int n = INTVAL (operands[2]) & 0x1f;
-      if (n <= 9)
-	{
-	  if (n == 0)
-	    emit_move_insn (operands[0], operands[1]);
-	  else if (n <= 2)
-	    {
-	      emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[1]));
-	      if (n == 2)
-		emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[0]));
-	    }
-	  else
-	    {
-	      rtx zero = gen_reg_rtx (SImode);
-	      emit_move_insn (zero, const0_rtx);
-	      emit_insn (gen_add_shift (operands[0], operands[1],
-					GEN_INT (3), zero));
-	      for (n -= 3; n >= 3; n -= 3)
-		emit_insn (gen_add_shift (operands[0], operands[0],
-					  GEN_INT (3), zero));
-	      if (n == 2)
-		emit_insn (gen_add_shift (operands[0], operands[0],
-					  const2_rtx, zero));
-	      else if (n)
-		emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[0]));
-	    }
-	  DONE;
-	}
-      else if (n >= 29)
-	{
-	  if (n < 31)
-	    {
-	      if (n == 29)
-		{
-		  emit_insn (gen_andsi3_i (operands[0], operands[1],
-					   GEN_INT (7)));
-		  emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
-		}
-	      else
-		emit_insn (gen_andsi3_i (operands[0], operands[1],
-					 GEN_INT (3)));
-	      emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
-	    }
-	  else
-	    emit_insn (gen_andsi3_i (operands[0], operands[1], const1_rtx));
-	  emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
-	  DONE;
-	}
-    }
-
-  rtx shift = gen_rtx_fmt_ee (ASHIFT, SImode, operands[1], operands[2]);
-  emit_insn (gen_shift_si3_loop (operands[0], operands[1],
-				 operands[2], shift));
-  DONE;
-})
-
-(define_insn_and_split "*ashlri3_nobs"
-  [(set (match_operand:SI 0 "dest_reg_operand")
-	(ashiftrt:SI (match_operand:SI 1 "register_operand")
-		     (match_operand:SI 2 "nonmemory_operand")))]
-  "!TARGET_BARREL_SHIFTER
-   && operands[2] != const1_rtx
-   && arc_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  if (CONST_INT_P (operands[2]))
-    {
-      int n = INTVAL (operands[2]) & 0x1f;
-      if (n <= 4)
-	{
-	  if (n != 0)
-	    {
-	      emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[1]));
-	      while (--n > 0)
-		emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[0]));
-	    }
-	  else 
-	    emit_move_insn (operands[0], operands[1]);
-	  DONE;
-	}
-    }
-
-  rtx pat;
-  rtx shift = gen_rtx_fmt_ee (ASHIFTRT, SImode, operands[1], operands[2]);
-  if (shiftr4_operator (shift, SImode))
-    pat = gen_shift_si3 (operands[0], operands[1], operands[2], shift);
-  else
-    pat = gen_shift_si3_loop (operands[0], operands[1], operands[2], shift);
-  emit_insn (pat);
-  DONE;
-})
-
-(define_insn_and_split "*lshrsi3_nobs"
+(define_insn_and_split "*<insn>si3_nobs"
   [(set (match_operand:SI 0 "dest_reg_operand")
-	(lshiftrt:SI (match_operand:SI 1 "register_operand")
-		     (match_operand:SI 2 "nonmemory_operand")))]
+	(ANY_SHIFT_ROTATE:SI (match_operand:SI 1 "register_operand")
+			     (match_operand:SI 2 "nonmemory_operand")))]
   "!TARGET_BARREL_SHIFTER
    && operands[2] != const1_rtx
    && arc_pre_reload_split ()"
@@ -3555,66 +3442,28 @@ archs4x, archs4xd"
   "&& 1"
   [(const_int 0)]
 {
-  if (CONST_INT_P (operands[2]))
-    {
-      int n = INTVAL (operands[2]) & 0x1f;
-      if (n <= 4)
-	{
-	  if (n != 0)
-	    {
-	      emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[1]));
-	      while (--n > 0)
-		emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[0]));
-	    }
-	  else 
-	    emit_move_insn (operands[0], operands[1]);
-	  DONE;
-	}
-    }
-
-  rtx pat;
-  rtx shift = gen_rtx_fmt_ee (LSHIFTRT, SImode, operands[1], operands[2]);
-  if (shiftr4_operator (shift, SImode))
-    pat = gen_shift_si3 (operands[0], operands[1], operands[2], shift);
-  else
-    pat = gen_shift_si3_loop (operands[0], operands[1], operands[2], shift);
-  emit_insn (pat);
+  arc_split_<insn> (operands);
   DONE;
 })
 
-;; shift_si3 appears after {ashr,lshr}si3_nobs
-(define_insn "shift_si3"
-  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
-	(match_operator:SI 3 "shiftr4_operator"
-			   [(match_operand:SI 1 "register_operand" "0")
-			    (match_operand:SI 2 "const_int_operand" "n")]))
-   (clobber (match_scratch:SI 4 "=&r"))
-   (clobber (reg:CC CC_REG))
-  ]
-  "!TARGET_BARREL_SHIFTER
-   && operands[2] != const1_rtx"
-  "* return output_shift (operands);"
-  [(set_attr "type" "shift")
-   (set_attr "length" "16")])
-
-;; shift_si3_loop appears after {ashl,ashr,lshr}si3_nobs
-(define_insn "shift_si3_loop"
+;; <ANY_SHIFT_ROTATE>si3_loop appears after <ANY_SHIFT_ROTATE>si3_nobs
+(define_insn "<insn>si3_loop"
   [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
-	(match_operator:SI 3 "shift_operator"
-			   [(match_operand:SI 1 "register_operand" "0,0")
-			    (match_operand:SI 2 "nonmemory_operand" "rn,Cal")]))
+	(ANY_SHIFT_ROTATE:SI 
+	  (match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "nonmemory_operand" "rn,Cal")))
    (clobber (reg:SI LP_COUNT))
    (clobber (reg:CC CC_REG))
   ]
   "!TARGET_BARREL_SHIFTER
    && operands[2] != const1_rtx"
-  "* return output_shift (operands);"
+  "* return output_shift_loop (<CODE>, operands);"
   [(set_attr "type" "shift")
    (set_attr "length" "16,20")])
 
 ;; Rotate instructions.
 
-(define_insn "rotrsi3"
+(define_insn "rotrsi3_insn"
   [(set (match_operand:SI 0 "dest_reg_operand"                    "=r, r,   r")
 	(rotatert:SI (match_operand:SI 1 "arc_nonmemory_operand"  " 0,rL,rCsz")
 		     (match_operand:SI 2 "nonmemory_operand"      "rL,rL,rCal")))]
@@ -3624,6 +3473,35 @@ archs4x, archs4xd"
    (set_attr "predicable" "yes,no,no")
    (set_attr "length" "4,4,8")])
 
+(define_insn_and_split "*rotlsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand")
+	(rotate:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "nonmemory_operand")))]
+  "TARGET_BARREL_SHIFTER
+   && arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (rotatert:SI (match_dup 1) (match_dup 3)))]
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      int n = INTVAL (operands[2]) & 31;
+      if (n == 0)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+      else operands[3] = GEN_INT (32 - n);
+    }
+  else
+    {
+      if (!register_operand (operands[2], SImode))
+	operands[2] = force_reg (SImode, operands[2]);
+      operands[3] = gen_reg_rtx (SImode);
+      emit_insn (gen_negsi2 (operands[3], operands[2]));
+    }
+})
+
 ;; Compare / branch instructions.
 
 (define_expand "cbranchsi4"
@@ -5995,6 +5873,20 @@ archs4x, archs4xd"
 		   (zero_extract:SI (match_dup 1) (match_dup 5) (match_dup 7)))])
    (match_dup 1)])
 
+(define_insn_and_split "rotlsi3_cnt1"
+  [(set (match_operand:SI 0 "dest_reg_operand"            "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand" "r")
+		   (const_int 1)))]
+  "!TARGET_BARREL_SHIFTER"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_add_f (operands[0], operands[1], operands[1]));
+  emit_insn (gen_adc (operands[0], operands[0], const0_rtx));
+  DONE;
+})
+
 (define_insn "rotrsi3_cnt1"
   [(set (match_operand:SI 0 "dest_reg_operand"              "=r")
 	(rotatert:SI (match_operand:SI 1 "nonmemory_operand" "rL")
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index e0aef86fd247..607075038ec1 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -542,15 +542,6 @@
   (match_code "ashiftrt, lshiftrt, ashift")
 )
 
-;; Return true if OP is a right shift operator that can be implemented in
-;; four insn words or less without a barrel shifter or multiplier.
-(define_predicate "shiftr4_operator"
-  (and (match_code "ashiftrt, lshiftrt")
-       (match_test "const_int_operand (XEXP (op, 1), VOIDmode) ")
-       (match_test "UINTVAL (XEXP (op, 1)) <= 4U
-		    || INTVAL (XEXP (op, 1)) == 30
-		    || INTVAL (XEXP (op, 1)) == 31")))
-
 (define_predicate "mult_operator"
     (and (match_code "mult") (match_test "TARGET_MPY"))
 )

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-10-24 15:44 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-24 15:44 [gcc r14-4892] ARC: Improved SImode shifts and rotates on !TARGET_BARREL_SHIFTER Roger Sayle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).