public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH SH2A]: Add movml instruction
@ 2010-05-07  7:44 Naveen H. S
  2010-05-08  0:14 ` Kaz Kojima
  0 siblings, 1 reply; 10+ messages in thread
From: Naveen H. S @ 2010-05-07  7:44 UTC (permalink / raw)
  To: gcc-patches; +Cc: kkojima, Prafulla Thakare

[-- Attachment #1: Type: text/plain, Size: 1191 bytes --]

Hi,

Please find attached the patch "sh2a-movml.patch that implements "movml" 
instruction for SH2A target. This instruction saves a number of consecutive 
registers to memory, or restore a number of consecutive registers from 
memory. 

Please review the patch and let me know if there should be any modifications 
in it.

Regression done for sh2a and variants.

ChangeLog
2010-05-07  Naveen H.S  <naveenh1@kpitcummins.com>
		Jayant Sonar  <jayants2@kpitcummins.com>

	* config/sh/sh.c (sh_override_options): Do not allow 
	flag_schedule_insns_after_reload for SH2A target.
	(print_operand): "s", New print_operand for movml instruction.
	(push_regs): Check whether multiple registers can be pushed using
	"movml" instruction for SH2A.
	(sh2a_gen_push_multiple): New function to push multiple registers
	for SH2A.
	(sh2a_gen_pop_multiple): New function to pop multiple registers
	for SH2A.
	(sh_expand_epilogue): Check whether multiple registers can be popped
	using "movml" instruction for SH2A.
	* config/sh/sh.md (UNSPEC_HIGH_REG): New constant.
	(movml_push): New instruction.
	(movml_pop): New instruction.


Regards,
Naveen.H.S
www.kpitgnutools.com

[-- Attachment #2: sh2a-movml.patch --]
[-- Type: application/octet-stream, Size: 9752 bytes --]

--- a/gcc/config/sh.orig/sh.c	2010-03-01 04:53:50.000000000 +0530
+++ b/gcc/config/sh/sh.c	2010-05-07 13:32:08.000000000 +0530
@@ -881,6 +881,10 @@ sh_override_options (void)
 	 PIC, SH3 and lower as they give spill failures for R0.  */
       if (!TARGET_HARD_SH4 || flag_pic)
         flag_schedule_insns = 0;
+      /* Do not run scheduling after reload for sh2a, as it schedules
+         registers after "movml" instruction in the prologue.  */
+      if (TARGET_SH2A)
+        flag_schedule_insns_after_reload = 0;
       /* ??? Current exception handling places basic block boundaries
 	 after call_insns.  It causes the high pressure on R0 and gives
 	 spill failures for R0 in reload.  See PR 22553 and the thread
@@ -1029,6 +1033,7 @@ print_operand_address (FILE *stream, rtx
    'U'  Likewise for {LD,ST}{HI,LO}.
    'V'  print the position of a single bit set.
    'W'  print the position of a single bit cleared.
+   's'  print the operand address.
    't'  print a memory address which is a register.
    'u'  prints the lowest 16 bits of CONST_INT, as an unsigned value.
    'o'  output an operator.  */
@@ -1170,6 +1175,10 @@ print_operand (FILE *stream, rtx x, int 
 	}
       break;
 
+    case 's':
+      print_operand_address (stream, x);
+      break;
+
     case 't':
       gcc_assert (MEM_P (x));
       x = XEXP (x, 0);
@@ -6318,8 +6327,9 @@ pop (int rn)
 static void
 push_regs (HARD_REG_SET *mask, int interrupt_handler)
 {
-  int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
+  int i = (interrupt_handler && !TARGET_SH2A)? LAST_BANKED_REG + 1 : 0;
   int skip_fpscr = 0;
+  int call_no=0;
 
   /* Push PR last; this gives better latencies after the prologue, and
      candidates for the return delay slot when there are no general
@@ -6342,20 +6352,27 @@ push_regs (HARD_REG_SET *mask, int inter
       if (i != PR_REG
 	  && (i != FPSCR_REG || ! skip_fpscr)
 	  && TEST_HARD_REG_BIT (*mask, i))
-           {
+        {
   	/* If the ISR has RESBANK attribute assigned, don't push any of
    	   the following registers - R0-R14, MACH, MACL and GBR.  */
-      if (! (sh_cfun_resbank_handler_p ()
-	     && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
-		 || i == MACH_REG
-		 || i == MACL_REG
-		 || i == GBR_REG)))
-	  push (i);
+	  if (! (sh_cfun_resbank_handler_p ()
+		 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
+		      || i == MACH_REG
+		      || i == MACL_REG
+		      || i == GBR_REG)))
+            {
+	    /* For sh2a, check out whether "movml" instruction can be 
+	       generated for pushing registers on to stack.  */
+              if (TARGET_SH2A)
+                sh2a_gen_push_multiple (i, call_no++, mask, skip_fpscr);
+              else
+                push (i);
+            }
   	}
     }
 
   /* Push banked registers last to improve delay slot opportunities.  */
-  if (interrupt_handler)
+  if (interrupt_handler && !TARGET_SH2A)
     for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
       if (TEST_HARD_REG_BIT (*mask, i))
 	push (i);
@@ -6365,6 +6382,130 @@ push_regs (HARD_REG_SET *mask, int inter
     push (PR_REG);
 }
 
+/* Generate code to push multiple registers using "movml" instruction for
+   SH2A target.  */
+
+int
+sh2a_gen_push_multiple (int reg, int call_no, HARD_REG_SET * regmask,
+                        int reg_skip_fpscr)
+{
+  static int movml_flag = 0, next_reg = 0;
+  
+  /* Check out whether the multiple registers can be pushed or individual
+     registers have to be pushed.  */
+  if (reg != PR_REG
+      && (reg != FPSCR_REG || !reg_skip_fpscr)
+      && TEST_HARD_REG_BIT (*regmask, reg))
+    {
+      if (call_no == 0)
+        {
+          /* Check out whether the first register pushed is R0.  */
+          if (reg == R0_REG)
+            movml_flag = 1;
+          next_reg = R0_REG;
+        }
+      if (movml_flag && (reg >= FIRST_GENERAL_REG && reg < LAST_GENERAL_REG))
+        {
+          if (next_reg == reg && reg < FP_REG)
+            {
+              next_reg = reg + 1;
+              movml_flag = 1;
+            }
+          /* Generate movml instruction if multiple registers are sequentially
+	      pushed on to stack with R0 as the first register.  */
+          else
+            {
+              if (next_reg != FP_REG)
+                next_reg = next_reg - 1;
+
+              if (next_reg == R0_REG)
+                push (R0_REG);
+              else
+                {
+                  emit_insn (gen_movml_push (GEN_INT (next_reg)));
+                  emit_insn (gen_rtx_USE
+                             (VOIDmode, gen_rtx_REG (SImode, SP_REG)));
+                  movml_flag = 0;
+                  next_reg = 0;
+                  push (reg);
+                  return 0;
+                }
+              movml_flag = 0;
+              next_reg = 0;
+            }
+        }
+      if (!movml_flag)
+        push (reg);
+    }
+
+  else if (movml_flag && (next_reg > 0))
+    {
+      next_reg -= 1;
+      emit_insn (gen_movml_push (GEN_INT (next_reg)));
+      emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, SP_REG)));
+      next_reg = 0;
+      movml_flag = 0;
+    }
+  return 0;
+}
+
+/* Generate code to pop multiple registers using "movml" instruction for
+   SH2A target.  */
+
+int
+sh2a_gen_pop_multiple (int reg, int call_no, HARD_REG_SET live_regmask)
+{
+  static int next_reg, count;
+  /* Check out whether the multiple registers can be popped or individual
+     registers have to be pushed.  */
+  if (reg != PR_REG && TEST_HARD_REG_BIT (live_regmask, reg))
+    {
+      if ((reg >= FIRST_GENERAL_REG) && (reg < LAST_GENERAL_REG))
+        {
+          if (call_no == 0)
+            {
+              next_reg = reg;
+              count = 0;
+            }
+          if (next_reg == reg && reg != R0_REG)
+            {
+              next_reg = reg - 1;
+              count++;
+            }
+          else if (reg != R0_REG)
+            {
+              while (count > 0)
+                {
+                  pop (next_reg + count);
+                  count--;
+                }
+              return (sh2a_gen_pop_multiple (reg, 0, live_regmask));
+            }
+          /* Generate movml instruction if multiple registers are sequentially
+             pushed on to stack with R0 as the first register.  */
+          else if (reg == R0_REG && count > 0)
+            {
+              emit_insn (gen_movml_pop (GEN_INT (next_reg), GEN_INT (count)));
+              emit_insn (gen_rtx_USE
+                         (VOIDmode, gen_rtx_REG (SImode, SP_REG)));
+              count = 0;
+              return 0;
+            }
+        }
+      else
+        pop (reg);
+    }
+  else if (count > 0)
+    {
+      while (count > 0)
+        {
+          pop (next_reg + count);
+          count--;
+        }
+    }
+  return 0;
+}
+
 /* Calculate how much extra space is needed to save all callee-saved
    target registers.
    LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
@@ -7278,6 +7419,7 @@ sh_expand_epilogue (bool sibcall_p)
   else /* ! TARGET_SH5 */
     {
       int last_reg;
+      int call_no=0;
 
       save_size = 0;
 	/* For an ISR with RESBANK attribute assigned, don't pop PR
@@ -7292,7 +7434,7 @@ sh_expand_epilogue (bool sibcall_p)
 
       /* Banked registers are popped first to avoid being scheduled in the
 	 delay slot. RTE switches banks before the ds instruction.  */
-      if (current_function_interrupt)
+      if (current_function_interrupt && !TARGET_SH2A)
 	{
 	  for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
 	    if (TEST_HARD_REG_BIT (live_regs_mask, i))
@@ -7311,6 +7453,12 @@ sh_expand_epilogue (bool sibcall_p)
 	      && hard_reg_set_intersect_p (live_regs_mask,
 					  reg_class_contents[DF_REGS]))
 	    fpscr_deferred = 1;
+
+	  /*  For sh2a, check out whether "movml" instruction can be
+	      generated for poping registers on to stack.  */
+	  else if (TARGET_SH2A)
+	    sh2a_gen_pop_multiple (j, call_no++, live_regs_mask);
+
 	  /* For an ISR with RESBANK attribute assigned, don't pop
 	     following registers, R0-R14, MACH, MACL and GBR.  */
 	  else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) 
@@ -7321,7 +7469,6 @@ sh_expand_epilogue (bool sibcall_p)
 			      || j == MACL_REG
 			      || j == GBR_REG)))
 	    pop (j);
-
 	  if (j == FIRST_FP_REG && fpscr_deferred)
 	    pop (FPSCR_REG);
 	}
--- a/gcc/config/sh.orig/sh.md	2009-11-22 04:21:07.000000000 +0530
+++ b/gcc/config/sh/sh.md	2010-05-07 13:32:47.000000000 +0530
@@ -163,6 +163,7 @@
 
   ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
   (UNSPEC_PCREL_SYMOFF	46)
+  (UNSPEC_HIGH_REG      47)
 
   ;; These are used with unspec_volatile.
   (UNSPECV_BLOCKAGE	0)
@@ -4735,6 +4736,26 @@ label:
 ;; We can't use push and pop on SHcompact because the stack must always
 ;; be 8-byte aligned.
 
+;; Generate movml instruction for SH2A target; if the registers are pushed
+;; sequentially on to stack with R0 as starting register
+(define_insn "movml_push"
+  [(unspec [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_HIGH_REG)]
+  "TARGET_SH2A"
+  "movml.l\tr%s0,@-r15"
+  [(set_attr "in_delay_slot" "no")]
+)
+
+;; Generate movml instruction for SH2A target; if the registers are popped
+;; sequentially from stack with R0 as final register
+(define_insn "movml_pop"
+  [(set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) 
+			  (match_operand:SI 0 "immediate_operand" "i")))
+   (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_HIGH_REG)]
+  "TARGET_SH2A"
+  "movml.l\t@r15+,r%s1"
+  [(set_attr "in_delay_slot" "no")]
+)
+
 (define_expand "push"
   [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
 	(match_operand:SI 0 "register_operand" "r,l,x"))]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH SH2A]: Add movml instruction
  2010-05-07  7:44 [PATCH SH2A]: Add movml instruction Naveen H. S
@ 2010-05-08  0:14 ` Kaz Kojima
  2010-08-13  6:46   ` Naveen H. S
  0 siblings, 1 reply; 10+ messages in thread
From: Kaz Kojima @ 2010-05-08  0:14 UTC (permalink / raw)
  To: Naveen.S; +Cc: gcc-patches, Prafulla.Thakare

"Naveen H. S" <Naveen.S@kpitcummins.com> wrote:
> Please find attached the patch "sh2a-movml.patch that implements "movml" 
> instruction for SH2A target. This instruction saves a number of consecutive 
> registers to memory, or restore a number of consecutive registers from 
> memory. 
> 
> Please review the patch and let me know if there should be any modifications 
> in it.
> 
> Regression done for sh2a and variants.
> 
> ChangeLog
> 2010-05-07  Naveen H.S  <naveenh1@kpitcummins.com>
> 		Jayant Sonar  <jayants2@kpitcummins.com>
> 
> 	* config/sh/sh.c (sh_override_options): Do not allow 
> 	flag_schedule_insns_after_reload for SH2A target.
> 	(print_operand): "s", New print_operand for movml instruction.
> 	(push_regs): Check whether multiple registers can be pushed using
> 	"movml" instruction for SH2A.
> 	(sh2a_gen_push_multiple): New function to push multiple registers
> 	for SH2A.
> 	(sh2a_gen_pop_multiple): New function to pop multiple registers
> 	for SH2A.
> 	(sh_expand_epilogue): Check whether multiple registers can be popped
> 	using "movml" instruction for SH2A.
> 	* config/sh/sh.md (UNSPEC_HIGH_REG): New constant.
> 	(movml_push): New instruction.
> 	(movml_pop): New instruction.

With your patch, movml can be generated only when R0-Rm are
push/pop'ed in the prologue/epilogue.
How often does it happen in the real programs?  It looks very
unusual because R0-R7 are the call used registers.  Only
possible examples I can imagine are the interrupt handlers.
If it's for such a limited case, it looks not worth to effort
to use movml in the generic prologue/epilogue in the first place.

Also it seems too bad to disable insn-scheduling for sh2a at all.
Usually appropriate blockage insns are used to disable insn
scheduling partially.  But the root problem would be that your
movml_push/movml_pop insns have no correct information for
the register uses.  It would be far better to define correct RTLs
for movml insns.  Perhaps arm's implementation of ldmsi_postincN
insns helps, for example.

Perhaps, movml peepholes like arm ldm/stm peepholes

  http://gcc.gnu.org/ml/gcc-patches/2010-04/msg01231.html

instead of movml push/pop insns in the prologue/epilogue
would be a bit more fruitful.

Regards,
	kaz

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH SH2A]: Add movml instruction
  2010-05-08  0:14 ` Kaz Kojima
@ 2010-08-13  6:46   ` Naveen H. S
  2010-08-14  8:06     ` Kaz Kojima
  0 siblings, 1 reply; 10+ messages in thread
From: Naveen H. S @ 2010-08-13  6:46 UTC (permalink / raw)
  To: Kaz Kojima; +Cc: gcc-patches, Prafulla Thakare

[-- Attachment #1: Type: text/plain, Size: 2022 bytes --]

Hi Kaz-san,

Thanks for reviewing the patch.

>> It would be far better to define correct RTLs for movml insns.  
>> Perhaps arm's implementation of ldmsi_postincN insns helps, for
>> example.

The "load_multiple" and "store_multiple" expand patterns were defined.
The RTL's were implemented that should generate movml and movmu 
instructions as per the suggestion. However, movml and movmu patterns
are very specific kind of multiple register transfers. They are 
related to push and pop patterns and hence need only R15(stack pointer 
register). Hence, these RTL's did not generate movml and movmu 
instructions.

>> Perhaps, movml peepholes like arm ldm/stm peepholes
>> instead of movml push/pop insns in the prologue/epilogue would be a
>> bit more fruitful

Peepholes are implemented to generate these multiple register transfer
patterns. However, push required register transfers from rn to r0 and 
pop required it to be from register r0 to rn. Hence, register push and 
pop sequence were modified in prologue and epilogue to meet these 
requirements. Please let me know whether the reversed sequence of push
and pop of registers would have any adverse effect on the toolchain.
  
The peepholes generated movml and movmu instruction as expected for 
sh2a target. Regressions were performed on all sh2a variants for sh-elf
toolchain and there was one failure in "gcc.dg/attr-isr.c" which checks
for push pattern. As movml instruction is generated instead of 
individual push patterns, it FAIL's. It seems to be a expected failure.

Please review the attached "sh2a_movml_movmu.patch" and let me know if
there should be any modifications in it.

ChangeLog
2010-08-13  Naveen H.S  <naveen.S@kpitcummins.com>

	* config/sh/sh.c (push_regs): Modify the sequence of register push
	to suit movml and movmu instruction requirements.
	(expand_epilogue): Likewise.
	* config/sh/sh.md: New peephole patterns to generate movml and movmu
	instructions.

Thanks & Regards,
Naveen


[-- Attachment #2: sh2a_movml_movmu.patch --]
[-- Type: application/octet-stream, Size: 9774 bytes --]

--- orig/gcc/config/sh/sh.c	2010-07-15 20:01:28.000000000 +0530
+++ mod/gcc/config/sh/sh.c	2010-08-13 11:18:57.000000000 +0530
@@ -6369,13 +6369,15 @@ pop (int rn)
 static void
 push_regs (HARD_REG_SET *mask, int interrupt_handler)
 {
-  int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
+  int i = FIRST_PSEUDO_REGISTER;
+  int first_reg = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
   int skip_fpscr = 0;
 
-  /* Push PR last; this gives better latencies after the prologue, and
-     candidates for the return delay slot when there are no general
-     registers pushed.  */
-  for (; i < FIRST_PSEUDO_REGISTER; i++)
+  /* Don't push PR register for an ISR with RESBANK attribute assigned.  */
+  if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
+    push (PR_REG);
+
+  for (; i >= first_reg; i--)
     {
       /* If this is an interrupt handler, and the SZ bit varies,
 	 and we have to push any floating point register, we need
@@ -6407,13 +6409,9 @@ push_regs (HARD_REG_SET *mask, int inter
 
   /* Push banked registers last to improve delay slot opportunities.  */
   if (interrupt_handler)
-    for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+    for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
       if (TEST_HARD_REG_BIT (*mask, i))
 	push (i);
-
-  /* Don't push PR register for an ISR with RESBANK attribute assigned.  */
-  if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
-    push (PR_REG);
 }
 
 /* Calculate how much extra space is needed to save all callee-saved
@@ -7330,35 +7328,24 @@ sh_expand_epilogue (bool sibcall_p)
     }
   else /* ! TARGET_SH5 */
     {
-      int last_reg;
+      int last_reg, interrupt_handler;
 
+      interrupt_handler = current_function_interrupt;
       save_size = 0;
-	/* For an ISR with RESBANK attribute assigned, don't pop PR
-	   register.  */
-      if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
-	  && !sh_cfun_resbank_handler_p ())	
-	{
-	  if (!frame_pointer_needed)
-	    emit_insn (gen_blockage ());
-	  pop (PR_REG);
-	}
 
       /* Banked registers are popped first to avoid being scheduled in the
 	 delay slot. RTE switches banks before the ds instruction.  */
       if (current_function_interrupt)
 	{
-	  for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
 	    if (TEST_HARD_REG_BIT (live_regs_mask, i))
 	      pop (i);
-
-	  last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
 	}
-      else
-	last_reg = FIRST_PSEUDO_REGISTER;
-
-      for (i = 0; i < last_reg; i++)
+	
+      i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
+      for (; i < FIRST_PSEUDO_REGISTER; i++)
 	{
-	  int j = (FIRST_PSEUDO_REGISTER - 1) - i;
+	  int j = i;
 
 	  if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
 	      && hard_reg_set_intersect_p (live_regs_mask,
@@ -7378,7 +7365,18 @@ sh_expand_epilogue (bool sibcall_p)
 	  if (j == FIRST_FP_REG && fpscr_deferred)
 	    pop (FPSCR_REG);
 	}
+
+        /* For an ISR with RESBANK attribute assigned, don't pop PR
+           register.  */
+      if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
+          && !sh_cfun_resbank_handler_p ())     
+        {
+          if (!frame_pointer_needed)
+            emit_insn (gen_blockage ());
+          pop (PR_REG);
+        }
     }
+
   if (target_flags != save_flags && ! current_function_interrupt)
     emit_insn (gen_toggle_sz ());
   target_flags = save_flags;
--- orig/gcc/config/sh/sh.md	2010-07-13 16:11:15.000000000 +0530
+++ mod/gcc/config/sh/sh.md	2010-08-13 11:20:20.000000000 +0530
@@ -6790,7 +6790,152 @@ label:
   "TARGET_SH1"
   [(set (match_dup 0) (match_dup 1))]
   "")
-\f
+
+;; peephole patterns to generate multiple register transfer instructions
+;; (movml and movmu) for sh2a target.
+
+(define_peephole
+  [(set (reg:SI R0_REG)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "register_operand" "r"))))
+   (set (reg:SI R1_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (reg:SI R2_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (reg:SI R3_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (reg:SI R4_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (reg:SI R5_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (reg:SI R6_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (reg:SI R7_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\t@r15+,r7")
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (match_operand:SI 0 "register_operand" "r")))
+        (reg:SI R7_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R6_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R5_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R4_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R3_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R2_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R1_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R0_REG))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\tr7,@-r15")
+
+(define_peephole
+  [(set (reg:SI R0_REG)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "register_operand" "r"))))
+   (set (reg:SI R1_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (reg:SI R2_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\t@r15+,r2")
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (match_operand:SI 0 "register_operand" "r")))
+        (reg:SI R2_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R1_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R0_REG))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\tr2,@-r15")
+
+(define_peephole
+  [(set (reg:SI R0_REG)
+        (mem:SI (post_inc:SI (match_operand:SI 0 "register_operand" "r"))))
+   (set (reg:SI R1_REG)
+        (mem:SI (post_inc:SI (match_dup 0))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\t@r15+,r1")
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (match_operand:SI 0 "register_operand" "r")))
+        (reg:SI R1_REG))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (reg:SI R0_REG))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\tr1,@-r15")
+
+(define_peephole
+  [(set (match_operand:SI 1 "register_operand" "")
+        (mem:SI (post_inc:SI (match_operand:SI 0 "register_operand" ""))))
+   (set (match_operand:SI 2 "register_operand" "")
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (match_operand:SI 3 "register_operand" "")
+       (mem:SI (post_inc:SI (match_dup 0))))
+   (set (match_operand:SI 4 "register_operand" "")
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (match_operand:SI 5 "register_operand" "")
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (match_operand:SI 6 "register_operand" "")
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (set (match_operand:SI 7 "register_operand" "")
+        (mem:SI (post_inc:SI (match_dup 0))))
+   (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
+   (set (match_operand:SI 8 "register_operand" "")
+        (mem:SI (post_inc:SI (match_dup 0))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15
+   && REGNO (operands[1]) == 8 && REGNO (operands[2]) == 9
+   && REGNO (operands[3]) == 10 && REGNO (operands[4]) == 11
+   && REGNO (operands[5]) == PIC_REG && REGNO (operands[6]) == 13
+   && REGNO (operands[7]) == FP_REG && REGNO (operands[8]) == PR_REG"
+  "movmu.l\t@r15+,r8")
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (match_operand:SI 0 "register_operand" "r")))
+        (match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (match_operand:SI 2 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (match_operand:SI 3 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (match_operand:SI 4 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (match_operand:SI 5 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (match_operand:SI 6 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (match_operand:SI 7 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (match_operand:SI 8 "register_operand" ""))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15
+   && REGNO (operands[8]) == 8 && REGNO (operands[7]) == 9
+   && REGNO (operands[6]) == 10 && REGNO (operands[5]) == 11
+   && REGNO (operands[4]) == PIC_REG && REGNO (operands[3]) == 13
+   && REGNO (operands[2]) == FP_REG && REGNO (operands[1]) == PR_REG"
+  "movmu.l\tr8,@-r15")
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (match_operand:SI 0 "register_operand" "r")))
+        (match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (match_dup 0)))
+        (match_operand:SI 2 "register_operand" ""))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15
+   && REGNO (operands[1]) == PR_REG && REGNO (operands[2]) == FP_REG"
+  "movmu.l\tr14,@-r15")   
+
+(define_peephole
+  [(set (match_operand:SI 1 "register_operand" "")
+        (mem:SI (post_inc:SI (match_operand:SI 0 "register_operand" ""))))
+   (set (match_operand:SI 2 "register_operand" "")
+        (mem:SI (post_inc:SI (match_dup 0))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15
+   && REGNO (operands[1]) == FP_REG && REGNO (operands[2]) == PR_REG"
+  "movmu.l\t@r15+,r14")  
 ;; ------------------------------------------------------------------------
 ;; Define the real conditional branch instructions.
 ;; ------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH SH2A]: Add movml instruction
  2010-08-13  6:46   ` Naveen H. S
@ 2010-08-14  8:06     ` Kaz Kojima
  2010-08-20  8:09       ` Naveen H. S
  0 siblings, 1 reply; 10+ messages in thread
From: Kaz Kojima @ 2010-08-14  8:06 UTC (permalink / raw)
  To: Naveen.S; +Cc: gcc-patches, Prafulla.Thakare

"Naveen H. S" <Naveen.S@kpitcummins.com> wrote:
> Peepholes are implemented to generate these multiple register transfer
> patterns. However, push required register transfers from rn to r0 and 
> pop required it to be from register r0 to rn. Hence, register push and 
> pop sequence were modified in prologue and epilogue to meet these 
> requirements. Please let me know whether the reversed sequence of push
> and pop of registers would have any adverse effect on the toolchain.

I've got many failures for c++/java test on sh4-linux
with your patch.

FAIL: g++.dg/compat/eh/ctor1 cp_compat_x_tst.o-cp_compat_y_tst.o execute
...
FAIL: g++.dg/cpp0x/noexcept03.C execution test
FAIL: g++.dg/cpp0x/noexcept07.C execution test
FAIL: g++.dg/cpp0x/nullptr21.C output pattern test, is , should match Test 1 OKTest 2 OK
FAIL: g++.dg/cpp0x/variadic73.C execution test
FAIL: g++.dg/eh/alias1.C execution test
FAIL: g++.dg/eh/cond1.C execution test
FAIL: g++.dg/eh/crossjump1.C execution test
FAIL: g++.dg/eh/ctor1.C execution test
FAIL: g++.dg/eh/ctor2.C execution test
FAIL: g++.dg/eh/delayslot1.C execution test
FAIL: g++.dg/eh/dtor1.C execution test
FAIL: g++.dg/eh/elide1.C execution test
FAIL: g++.dg/eh/elide2.C execution test
FAIL: g++.dg/eh/forced1.C execution test
...
                === g++ Summary ===

# of expected passes            23398
# of unexpected failures        150
...

and similar failures are seen for sh-elf/-m2a.  Please do c++
test at least next time.
It seems that the change to push PR first/pop PR last causes them.
Also there is a comment

> -  /* Push PR last; this gives better latencies after the prologue, and
> -     candidates for the return delay slot when there are no general
> -     registers pushed.  */

at the top of push_regs.  This would be a far more important
optimization comparing with using movmu for a very rare
situation.

> The peepholes generated movml and movmu instruction as expected for 
> sh2a target. Regressions were performed on all sh2a variants for sh-elf
> toolchain and there was one failure in "gcc.dg/attr-isr.c" which checks
> for push pattern. As movml instruction is generated instead of 
> individual push patterns, it FAIL's. It seems to be a expected failure.

Then gcc.dg/attr-isr.c should be changed so to skip it for sh2a.

> @@ -6369,13 +6369,15 @@ pop (int rn)
>  static void
>  push_regs (HARD_REG_SET *mask, int interrupt_handler)
>  {
> -  int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
> +  int i = FIRST_PSEUDO_REGISTER;

It looks that the new initial value of i should be
FIRST_PSEUDO_REGISTER - 1, not FIRST_PSEUDO_REGISTER.

> @@ -7330,35 +7328,24 @@ sh_expand_epilogue (bool sibcall_p)
>      }
>    else /* ! TARGET_SH5 */
>      {
> -      int last_reg;
> +      int last_reg, interrupt_handler;
>  
> +      interrupt_handler = current_function_interrupt;
[snip]
> +      i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;

A new variable interrupt_handler is introduced for only
one time use.  Instead of that new variable,

      if (current_function_interrupt)
	{
	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
	    if (TEST_HARD_REG_BIT (live_regs_mask, i))
	      pop (i);
	}
      else
        i = 0;

looks to be enough, doesn't it?

> @@ -6790,7 +6790,152 @@ label:
>    "TARGET_SH1"
>    [(set (match_dup 0) (match_dup 1))]
>    "")
> -\f
> +

Leave this ^L here.

> +;; peephole patterns to generate multiple register transfer instructions
> +;; (movml and movmu) for sh2a target.
> +
> +(define_peephole

Use of define_peephole is deprecated.  Please use define_peephole2
instead.

> +  "movmu.l\t@r15+,r14")  
>  ;; ------------------------------------------------------------------------
>  ;; Define the real conditional branch instructions.
>  ;; ------------------------------------------------------------------------

Add a ^L just before the above comment.

>	* config/sh/sh.c (push_regs): Modify the sequence of register push
>	to suit movml and movmu instruction requirements.

Maybe

	* config/sh/sh.c (push_regs): Modify the order of registers.

?  We don't describe why, in the ChangeLog.

>	* config/sh/sh.md: New peephole patterns to generate movml and movmu
>	instructions.

Perhaps the usual way is something like

	* config/sh/sh.md (peephole2 for movml): New.

where I assume that things for movmu will be removed.

Regards,
	kaz

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH SH2A]: Add movml instruction
  2010-08-14  8:06     ` Kaz Kojima
@ 2010-08-20  8:09       ` Naveen H. S
  2010-08-20 13:53         ` Kaz Kojima
  0 siblings, 1 reply; 10+ messages in thread
From: Naveen H. S @ 2010-08-20  8:09 UTC (permalink / raw)
  To: Kaz Kojima; +Cc: gcc-patches, Prafulla Thakare

[-- Attachment #1: Type: text/plain, Size: 2515 bytes --]

Hi Kaz-san,

Thanks for the comments. 

>> I've got many failures for c++/java test on sh4-linux
>> with your patch.
>> and similar failures are seen for sh-elf/-m2a.  Please do c++
>> test at least next time.

Sorry for missing out C++ test this time. I also observed the 
failures with C++ testsuite.

>> It seems that the change to push PR first/pop PR last causes them.

Yes. The change of push PR first/pop PR last resulted in these failures.

>> This would be a far more important optimization comparing with using 
>> movmu for a very rare situation.

Thanks for pointing out the issue. This optimization seems to generate 
more optimization than movmu.

>> Then gcc.dg/attr-isr.c should be changed so to skip it for sh2a.

Done. "gcc.dg/attr-isr.c" test is skipped for sh2a.

>> Use of define_peephole is deprecated.  Please use define_peephole2
>> instead.

The patterns for movml and movmu are generated with O0 and O1
optimization. Higher optimization use "flag_schedule_insns_after_reload"
which reschedules the instructions. Hence, defining peephole2 would not
help in generating these multiple transfer instructions.

movml deals with push/pop of registers "r0-r7" which will be generated 
only with interrupt routines. Please find attached the patch 
"sh2a_movml.patch" which modifies prologue and epilogue patterns for 
interrupt routines to generate multiple transfer instructions. 

Modification of prologue and epilogue for interrupt routines was the
only solution I found to implement these multiple transfers. I have 
tried peepholes, instructions etc which did not give expected results.
Please let me know whether they can be implemented in any other way.

>> I assume that things for movmu will be removed.

The changes for movmu are removed as push PR first/pop PR last will 
lead to many failures and also inhibits optimization. However movmu
pattern requires push PR first/pop PR last and hence it was concluded
that movmu instruction can't be implemented for sh2a.

Regression performed on sh2a and related targets.
No new regressions found.

ChangeLog
2010-08-20  Naveen H.S  <naveen.S@kpitcummins.com>

	* config/sh/sh.c (push_regs): Modify the order of registers for
	interrupt routines.
	(expand_epilogue): Likewise.
	* config/sh/sh.md (UNSPEC_HIGH_REG): New constant.
	(movml_push): New instruction.
	(movml_pop): New instruction.
	
	* testsuite/gcc.dg/attr-isr.c: Skip test for sh2a.
	
Thanks & Regards,
Naveen


[-- Attachment #2: sh2a_movml.patch --]
[-- Type: application/octet-stream, Size: 4339 bytes --]

--- orig/gcc/config/sh/sh.c	2010-07-15 20:01:28.000000000 +0530
+++ mod/gcc/config/sh/sh.c	2010-08-18 20:36:07.000000000 +0530
@@ -6370,7 +6370,7 @@ static void
 push_regs (HARD_REG_SET *mask, int interrupt_handler)
 {
   int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
-  int skip_fpscr = 0;
+  int skip_fpscr = 0, next_reg = 0;
 
   /* Push PR last; this gives better latencies after the prologue, and
      candidates for the return delay slot when there are no general
@@ -6407,9 +6407,27 @@ push_regs (HARD_REG_SET *mask, int inter
 
   /* Push banked registers last to improve delay slot opportunities.  */
   if (interrupt_handler)
-    for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+    for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
       if (TEST_HARD_REG_BIT (*mask, i))
-	push (i);
+	{
+	  if (TARGET_SH2A && !flag_schedule_insns_after_reload)
+	    {
+	      next_reg++;	
+	      if (i == FIRST_BANKED_REG)	
+		{	
+		  if (next_reg == 8)	
+		  emit_insn (gen_movml_push (GEN_INT (next_reg)));
+		  else 
+		    {
+		      for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+			if (TEST_HARD_REG_BIT (*mask, i))
+			  push (i);
+		    }
+	        }	
+	    }
+	  else
+	    push (i);
+	}
 
   /* Don't push PR register for an ISR with RESBANK attribute assigned.  */
   if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
@@ -7330,7 +7348,7 @@ sh_expand_epilogue (bool sibcall_p)
     }
   else /* ! TARGET_SH5 */
     {
-      int last_reg;
+      int last_reg, next_reg = 0;
 
       save_size = 0;
 	/* For an ISR with RESBANK attribute assigned, don't pop PR
@@ -7347,10 +7365,27 @@ sh_expand_epilogue (bool sibcall_p)
 	 delay slot. RTE switches banks before the ds instruction.  */
       if (current_function_interrupt)
 	{
-	  for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
 	    if (TEST_HARD_REG_BIT (live_regs_mask, i))
-	      pop (i);
-
+	      {
+		if (TARGET_SH2A && !flag_schedule_insns_after_reload)
+		  {
+		    next_reg++;
+		    if (i == LAST_BANKED_REG)
+		      {
+			if (next_reg == 8)
+			emit_insn (gen_movml_pop (GEN_INT (next_reg), GEN_INT (next_reg)));
+			else
+			  {
+			    for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+			      if (TEST_HARD_REG_BIT (live_regs_mask, i))
+				pop (i);
+			  }
+		      }
+		  }
+		else 
+		  pop (i);
+	      }
 	  last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
 	}
       else
--- orig/gcc/config/sh/sh.md	2010-08-20 11:08:56.000000000 +0530
+++ mod/gcc/config/sh/sh.md	2010-08-18 20:36:07.000000000 +0530
@@ -164,6 +164,7 @@
 
   ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
   (UNSPEC_PCREL_SYMOFF	46)
+  (UNSPEC_HIGH_REG	47)
 
   ;; These are used with unspec_volatile.
   (UNSPECV_BLOCKAGE	0)
@@ -6791,6 +6792,25 @@ label:
   [(set (match_dup 0) (match_dup 1))]
   "")
 
+;; Generate movml instruction for SH2A target; if the registers are pushed
+;; sequentially on to stack with R0 as starting register
+(define_insn "movml_push"
+  [(unspec [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_HIGH_REG)]
+  "TARGET_SH2A"
+  "movml.l\tr%s0,@-r15"
+  [(set_attr "in_delay_slot" "no")]
+)
+
+;; Generate movml instruction for SH2A target; if the registers are popped
+;; sequentially from stack with R0 as final register
+(define_insn "movml_pop"
+  [(set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) 
+			  (match_operand:SI 0 "immediate_operand" "i")))
+   (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_HIGH_REG)]
+  "TARGET_SH2A"
+  "movml.l\t@r15+,r%s1"
+  [(set_attr "in_delay_slot" "no")]
+)
 ;; ------------------------------------------------------------------------
 ;; Define the real conditional branch instructions.
 ;; ------------------------------------------------------------------------
diff -upr gcc-4.6-20100717/gcc/testsuite/gcc.dg/attr-isr.c toolchain/sh/src/gcc-4.6-20100717/gcc/testsuite/gcc.dg/attr-isr.c
--- orig/gcc/testsuite/gcc.dg/attr-isr.c	2007-08-13 14:54:46.000000000 +0530
+++ mod/gcc/testsuite/gcc.dg/attr-isr.c	2010-08-18 20:37:41.000000000 +0530
@@ -1,4 +1,5 @@
 /* { dg-do compile { target { { sh-*-* sh[1234ble]*-*-* } && nonpic } } } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m2a*" } { "" } } */
 /* { dg-options "-O" } */
 extern void foo ();
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH SH2A]: Add movml instruction
  2010-08-20  8:09       ` Naveen H. S
@ 2010-08-20 13:53         ` Kaz Kojima
  2010-08-22  2:51           ` Kaz Kojima
  0 siblings, 1 reply; 10+ messages in thread
From: Kaz Kojima @ 2010-08-20 13:53 UTC (permalink / raw)
  To: Naveen.S; +Cc: gcc-patches, Prafulla.Thakare

"Naveen H. S" <Naveen.S@kpitcummins.com> wrote:
> Modification of prologue and epilogue for interrupt routines was the
> only solution I found to implement these multiple transfers. I have 
> tried peepholes, instructions etc which did not give expected results.
> Please let me know whether they can be implemented in any other way.

Although there were another ways to do it, if the purpose
is simply to generate movml for interrupt handlers when
possible, modification of {pro,epi}logue would be best.

> @@ -7347,10 +7365,27 @@ sh_expand_epilogue (bool sibcall_p)
>  	 delay slot. RTE switches banks before the ds instruction.  */
>        if (current_function_interrupt)
>  	{
> -	  for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
> +	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
>  	    if (TEST_HARD_REG_BIT (live_regs_mask, i))
> -	      pop (i);
> -
> +	      {
> +		if (TARGET_SH2A && !flag_schedule_insns_after_reload)

Why is !flag_schedule_insns_after_reload checked here?
Does sched2 pass make some problem with movml?

> +		  {
> +		    next_reg++;
> +		    if (i == LAST_BANKED_REG)
> +		      {
> +			if (next_reg == 8)
> +			emit_insn (gen_movml_pop (GEN_INT (next_reg), GEN_INT (next_reg)));
> +			else
> +			  {
> +			    for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
> +			      if (TEST_HARD_REG_BIT (live_regs_mask, i))
> +				pop (i);
> +			  }
> +		      }
> +		  }
> +		else 
> +		  pop (i);
> +	      }

Also now there is no need to modify the order of registers.
Perhaps it would be better to write something like:

	* config/sh/sh.c (push_regs): Emit movml for interrupt
	handler when possible.
	...

   if (interrupt_handler)
     {
       bool use_movml = false;

       if (TARGET_SH2A)
	 {
	   unsigned int count = 0;

	   for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
	     if (TEST_HARD_REG_BIT (*mask, i))
	       count++;

	   /* We can use movml insn when all banked registers are
	      pushed.  */
	   if (count == LAST_BANKED_REG - FIRST_BANKED_REG)
	     use_movml = true;
	 }

       if (use_movml)
	 emit_insn (gen_movml_push ()));
       else
	 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
	   if (TEST_HARD_REG_BIT (*mask, i))
	     push (i);
     }

> +  (UNSPEC_HIGH_REG	47)
...
> +(define_insn "movml_push"
> +  [(unspec [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_HIGH_REG)]
...
> +(define_insn "movml_pop"
> +  [(set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) 
> +			  (match_operand:SI 0 "immediate_operand" "i")))
> +   (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_HIGH_REG)]

I guess that some optimization passes can remove these insns
which have no accurate register/memory uses.
Looks unique const_int will be used as operands for these insns,
right?  If so, it would be better to use normal rtls which describe
the semantic of those movml insns accurately, like as the peepholes
in your previous patch.

Regards,
	kaz

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH SH2A]: Add movml instruction
  2010-08-20 13:53         ` Kaz Kojima
@ 2010-08-22  2:51           ` Kaz Kojima
  2010-08-24  9:14             ` Naveen H. S
  0 siblings, 1 reply; 10+ messages in thread
From: Kaz Kojima @ 2010-08-22  2:51 UTC (permalink / raw)
  To: Naveen.S; +Cc: gcc-patches, Prafulla.Thakare

> If so, it would be better to use normal rtls which describe
> the semantic of those movml insns accurately, like as the peepholes
> in your previous patch.

Another minor problem is that your patch gives wrong dwarf2
information with -g.  Even if it might not be a big problem
for the interrupt handlers, the correct debug information is
better.  The attached patch will give correct dwarf2 codes
with -g, though it isn't tested except for gcc.dg/attr-isr.c.
Does it work for you?

Regards,
	kaz
--
	* config/sh/sh.c (push_regs): Emit movml for interrupt handler
	when possible.
	(sh_expand_epilogue): Likewise.
	* config/sh/sh.md (movml_push_banked): New insn.
	(movml_pop_banked): Likewise.

diff -up ORIG/trunk/gcc/config/sh/sh.c trunk/gcc/config/sh/sh.c
--- ORIG/trunk/gcc/config/sh/sh.c	2010-07-17 10:31:31.000000000 +0900
+++ trunk/gcc/config/sh/sh.c	2010-08-22 10:58:56.000000000 +0900
@@ -6407,9 +6407,50 @@ push_regs (HARD_REG_SET *mask, int inter
 
   /* Push banked registers last to improve delay slot opportunities.  */
   if (interrupt_handler)
-    for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
-      if (TEST_HARD_REG_BIT (*mask, i))
-	push (i);
+    {
+      bool use_movml = false;
+
+      if (TARGET_SH2A)
+	{
+	  unsigned int count = 0;
+
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    if (TEST_HARD_REG_BIT (*mask, i))
+	      count++;
+	    else
+	      break;
+
+	  /* Use movml when all banked registers are pushed.  */
+	  if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+	    use_movml = true;
+	}
+
+      if (use_movml)
+	{
+	  rtx x, mem, reg, set;
+	  rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	  /* We must avoid scheduling multiple store insn with another
+	     insns.  */
+	  emit_insn (gen_blockage ());
+	  x = gen_movml_push_banked (sp_reg);
+	  x = frame_insn (x);
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    {
+	      mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
+	      reg = gen_rtx_REG (SImode, i);
+	      add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
+	    }
+
+	  set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
+	  add_reg_note (x, REG_CFA_ADJUST_CFA, set);
+	  emit_insn (gen_blockage ());
+	}
+      else
+	for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	  if (TEST_HARD_REG_BIT (*mask, i))
+	    push (i);
+    }
 
   /* Don't push PR register for an ISR with RESBANK attribute assigned.  */
   if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
@@ -7347,9 +7388,37 @@ sh_expand_epilogue (bool sibcall_p)
 	 delay slot. RTE switches banks before the ds instruction.  */
       if (current_function_interrupt)
 	{
-	  for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
-	    if (TEST_HARD_REG_BIT (live_regs_mask, i))
-	      pop (i);
+	  bool use_movml = false;
+
+	  if (TARGET_SH2A)
+	    {
+	      unsigned int count = 0;
+
+	      for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+		if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		  count++;
+		else
+		  break;
+
+	      /* Use movml when all banked register are poped.  */
+	      if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+		use_movml = true;
+	    }
+
+	  if (use_movml)
+	    {
+	      rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	      /* We must avoid scheduling multiple load insn with another
+		 insns.  */
+	      emit_insn (gen_blockage ());
+	      emit_insn (gen_movml_pop_banked (sp_reg));
+	      emit_insn (gen_blockage ());
+	    }
+	  else
+	    for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+	      if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		pop (i);
 
 	  last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
 	}
diff -up ORIG/trunk/gcc/config/sh/sh.md trunk/gcc/config/sh/sh.md
--- ORIG/trunk/gcc/config/sh/sh.md	2010-07-13 23:39:56.000000000 +0900
+++ trunk/gcc/config/sh/sh.md	2010-08-22 09:59:38.000000000 +0900
@@ -9216,6 +9216,39 @@ mov.l\\t1f,r0\\n\\
   ""
   [(set_attr "length" "0")])
 \f
+;; Define movml instructions for SH2A target.  Currently they are
+;; used to push and pop all banked registers only.
+
+(define_insn "movml_push_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int -32)))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 28))) (reg:SI R7_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 24))) (reg:SI R6_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 20))) (reg:SI R5_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 16))) (reg:SI R4_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 12))) (reg:SI R3_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 8))) (reg:SI R2_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 4))) (reg:SI R1_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 0))) (reg:SI R0_REG))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\tr7,@-r15"
+  [(set_attr "in_delay_slot" "no")])
+
+(define_insn "movml_pop_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int 32)))
+   (set (reg:SI R0_REG) (mem:SI (plus:SI (match_dup 0) (const_int -32))))
+   (set (reg:SI R1_REG) (mem:SI (plus:SI (match_dup 0) (const_int -28))))
+   (set (reg:SI R2_REG) (mem:SI (plus:SI (match_dup 0) (const_int -24))))
+   (set (reg:SI R3_REG) (mem:SI (plus:SI (match_dup 0) (const_int -20))))
+   (set (reg:SI R4_REG) (mem:SI (plus:SI (match_dup 0) (const_int -16))))
+   (set (reg:SI R5_REG) (mem:SI (plus:SI (match_dup 0) (const_int -12))))
+   (set (reg:SI R6_REG) (mem:SI (plus:SI (match_dup 0) (const_int -8))))
+   (set (reg:SI R7_REG) (mem:SI (plus:SI (match_dup 0) (const_int -4))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\t@r15+,r7"
+  [(set_attr "in_delay_slot" "no")])
+\f
 ;; ------------------------------------------------------------------------
 ;; Scc instructions
 ;; ------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH SH2A]: Add movml instruction
  2010-08-22  2:51           ` Kaz Kojima
@ 2010-08-24  9:14             ` Naveen H. S
  2010-08-24 23:05               ` Kaz Kojima
  0 siblings, 1 reply; 10+ messages in thread
From: Naveen H. S @ 2010-08-24  9:14 UTC (permalink / raw)
  To: Kaz Kojima; +Cc: gcc-patches, Prafulla Thakare

Hi Kaz-san,

>> The attached patch will give correct dwarf2 codes with -g, though it
>> isn't tested except for gcc.dg/attr-isr.c.

Thanks for the modified patch.

>> Does it work for you?

Yes, the modified patch works as expected for interrupt handlers.
The regression was performed for sh2a and related targets. There were
no extra regressions. 
The testcase "g++.dg/parse/stack1.C" in C++ testsuite PASS with the 
patch which FAIL with unpatched/fresh toolchain.

Thanks & Regards,
Naveen


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH SH2A]: Add movml instruction
  2010-08-24  9:14             ` Naveen H. S
@ 2010-08-24 23:05               ` Kaz Kojima
  2010-08-28  0:36                 ` Kaz Kojima
  0 siblings, 1 reply; 10+ messages in thread
From: Kaz Kojima @ 2010-08-24 23:05 UTC (permalink / raw)
  To: Naveen.S; +Cc: gcc-patches, Prafulla.Thakare

"Naveen H. S" <Naveen.S@kpitcummins.com> wrote:
> Yes, the modified patch works as expected for interrupt handlers.
> The regression was performed for sh2a and related targets. There were
> no extra regressions. 
> The testcase "g++.dg/parse/stack1.C" in C++ testsuite PASS with the 
> patch which FAIL with unpatched/fresh toolchain.

Thanks for testing.  I'll apply it together with the testsuite
patch when it bootstraps on sh-linux.

Regards,
	kaz

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH SH2A]: Add movml instruction
  2010-08-24 23:05               ` Kaz Kojima
@ 2010-08-28  0:36                 ` Kaz Kojima
  0 siblings, 0 replies; 10+ messages in thread
From: Kaz Kojima @ 2010-08-28  0:36 UTC (permalink / raw)
  To: Naveen.S; +Cc: gcc-patches, Prafulla.Thakare

> I'll apply it together with the testsuite
> patch when it bootstraps on sh-linux.

I've just committed the patch below as revision 163602.

Regards,
	kaz
--
[gcc]
2010-08-27  Naveen H.S  <naveen.S@kpitcummins.com>
	    Kaz Kojima  <kkojima@gcc.gnu.org>

	* config/sh/sh.c (push_regs): Emit movml for interrupt handler
	when possible.
	(sh_expand_epilogue): Likewise.
	* config/sh/sh.md (movml_push_banked): New insn.
	(movml_pop_banked): Likewise.

[testsuite]
2010-08-27  Naveen H.S  <naveen.S@kpitcummins.com>
	    Kaz Kojima  <kkojima@gcc.gnu.org>

	* gcc.dg/attr-isr.c: Skip test for -m2a.  Don't	run on sh2a*-*-*.

diff -uprN ORIG/trunk/gcc/config/sh/sh.c trunk/gcc/config/sh/sh.c
--- ORIG/trunk/gcc/config/sh/sh.c	2010-07-17 10:31:31.000000000 +0900
+++ trunk/gcc/config/sh/sh.c	2010-08-24 20:01:37.000000000 +0900
@@ -6407,9 +6407,50 @@ push_regs (HARD_REG_SET *mask, int inter
 
   /* Push banked registers last to improve delay slot opportunities.  */
   if (interrupt_handler)
-    for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
-      if (TEST_HARD_REG_BIT (*mask, i))
-	push (i);
+    {
+      bool use_movml = false;
+
+      if (TARGET_SH2A)
+	{
+	  unsigned int count = 0;
+
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    if (TEST_HARD_REG_BIT (*mask, i))
+	      count++;
+	    else
+	      break;
+
+	  /* Use movml when all banked registers are pushed.  */
+	  if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+	    use_movml = true;
+	}
+
+      if (use_movml)
+	{
+	  rtx x, mem, reg, set;
+	  rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	  /* We must avoid scheduling multiple store insn with another
+	     insns.  */
+	  emit_insn (gen_blockage ());
+	  x = gen_movml_push_banked (sp_reg);
+	  x = frame_insn (x);
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    {
+	      mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
+	      reg = gen_rtx_REG (SImode, i);
+	      add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
+	    }
+
+	  set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
+	  add_reg_note (x, REG_CFA_ADJUST_CFA, set);
+	  emit_insn (gen_blockage ());
+	}
+      else
+	for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	  if (TEST_HARD_REG_BIT (*mask, i))
+	    push (i);
+    }
 
   /* Don't push PR register for an ISR with RESBANK attribute assigned.  */
   if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
@@ -7347,9 +7388,37 @@ sh_expand_epilogue (bool sibcall_p)
 	 delay slot. RTE switches banks before the ds instruction.  */
       if (current_function_interrupt)
 	{
-	  for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
-	    if (TEST_HARD_REG_BIT (live_regs_mask, i))
-	      pop (i);
+	  bool use_movml = false;
+
+	  if (TARGET_SH2A)
+	    {
+	      unsigned int count = 0;
+
+	      for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+		if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		  count++;
+		else
+		  break;
+
+	      /* Use movml when all banked register are poped.  */
+	      if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+		use_movml = true;
+	    }
+
+	  if (use_movml)
+	    {
+	      rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	      /* We must avoid scheduling multiple load insn with another
+		 insns.  */
+	      emit_insn (gen_blockage ());
+	      emit_insn (gen_movml_pop_banked (sp_reg));
+	      emit_insn (gen_blockage ());
+	    }
+	  else
+	    for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+	      if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		pop (i);
 
 	  last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
 	}
diff -uprN ORIG/trunk/gcc/config/sh/sh.md trunk/gcc/config/sh/sh.md
--- ORIG/trunk/gcc/config/sh/sh.md	2010-07-13 23:39:56.000000000 +0900
+++ trunk/gcc/config/sh/sh.md	2010-08-24 20:01:37.000000000 +0900
@@ -9216,6 +9216,39 @@ mov.l\\t1f,r0\\n\\
   ""
   [(set_attr "length" "0")])
 \f
+;; Define movml instructions for SH2A target.  Currently they are
+;; used to push and pop all banked registers only.
+
+(define_insn "movml_push_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int -32)))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 28))) (reg:SI R7_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 24))) (reg:SI R6_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 20))) (reg:SI R5_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 16))) (reg:SI R4_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 12))) (reg:SI R3_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 8))) (reg:SI R2_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 4))) (reg:SI R1_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 0))) (reg:SI R0_REG))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\tr7,@-r15"
+  [(set_attr "in_delay_slot" "no")])
+
+(define_insn "movml_pop_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int 32)))
+   (set (reg:SI R0_REG) (mem:SI (plus:SI (match_dup 0) (const_int -32))))
+   (set (reg:SI R1_REG) (mem:SI (plus:SI (match_dup 0) (const_int -28))))
+   (set (reg:SI R2_REG) (mem:SI (plus:SI (match_dup 0) (const_int -24))))
+   (set (reg:SI R3_REG) (mem:SI (plus:SI (match_dup 0) (const_int -20))))
+   (set (reg:SI R4_REG) (mem:SI (plus:SI (match_dup 0) (const_int -16))))
+   (set (reg:SI R5_REG) (mem:SI (plus:SI (match_dup 0) (const_int -12))))
+   (set (reg:SI R6_REG) (mem:SI (plus:SI (match_dup 0) (const_int -8))))
+   (set (reg:SI R7_REG) (mem:SI (plus:SI (match_dup 0) (const_int -4))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\t@r15+,r7"
+  [(set_attr "in_delay_slot" "no")])
+\f
 ;; ------------------------------------------------------------------------
 ;; Scc instructions
 ;; ------------------------------------------------------------------------
diff -uprN ORIG/trunk/gcc/testsuite/gcc.dg/attr-isr.c trunk/gcc/testsuite/gcc.dg/attr-isr.c
--- ORIG/trunk/gcc/testsuite/gcc.dg/attr-isr.c	2007-08-15 09:26:27.000000000 +0900
+++ trunk/gcc/testsuite/gcc.dg/attr-isr.c	2010-08-24 20:01:37.000000000 +0900
@@ -1,4 +1,5 @@
-/* { dg-do compile { target { { sh-*-* sh[1234ble]*-*-* } && nonpic } } } */
+/* { dg-do compile { target { { { sh-*-* sh[1234ble]*-*-* } && { ! sh2a*-*-* } } && nonpic } } } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m2a*" } { "" } } */
 /* { dg-options "-O" } */
 extern void foo ();
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2010-08-27 23:32 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-07  7:44 [PATCH SH2A]: Add movml instruction Naveen H. S
2010-05-08  0:14 ` Kaz Kojima
2010-08-13  6:46   ` Naveen H. S
2010-08-14  8:06     ` Kaz Kojima
2010-08-20  8:09       ` Naveen H. S
2010-08-20 13:53         ` Kaz Kojima
2010-08-22  2:51           ` Kaz Kojima
2010-08-24  9:14             ` Naveen H. S
2010-08-24 23:05               ` Kaz Kojima
2010-08-28  0:36                 ` Kaz Kojima

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).