public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* ARM patch: Thumb2 reorg
@ 2010-04-28 10:03 Bernd Schmidt
  2010-04-28 14:57 ` Richard Earnshaw
  2010-05-10  8:50 ` Nick Clifton
  0 siblings, 2 replies; 7+ messages in thread
From: Bernd Schmidt @ 2010-04-28 10:03 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1410 bytes --]

This introduces a Thumb2-specific reorg pass.  While working on the IRA
patch I posted yesterday, I noticed that ifcvt was affected by register
allocation.  We have a peephole2 that converts arithmetic instructions
into their flag-clobbering versions, since those have smaller encodings.
 This happens a bit too early in the pipeline and can prevent further
optimizations.

Fixed by moving it to the reorg pass.  Here's a sample of changes:

.L7:                            .L7:
        ldrh    r1, [r2, r3]            ldrh    r1, [r2, r3]
        cmp     r1, r0                  cmp     r1, r0
        bne     .L6           |         itt     eq
        adds    r1, r5, r3    |         addeq   r1, r5, r3
        ldr     r4, [r1, #4]  |         ldreq   r4, [r1, #4]
.L6:                          <

or

        mov     r2, r1        |         and     r2, r1, r4
        mov     r3, r6        |         and     r3, r6, r5
        ands    r2, r2, r4    <
        ands    r3, r3, r5    <


This patch has been regression tested on
arm-linux-gnueabi(qemu-system-armv7{arch=armv7-a/thumb,thumb,}); I've
also run SPEC2000 on Cortex-A9, where this patch together with the IRA
patch improves overall score a tiny bit, with a ~2.5% improvement on
gap.  Ok?

There are other things we could add to thumb2_reorg; e.g. creation of IT
insns - currently we seem to overestimate the size of all cond_execs on
Thumb2.


Bernd

[-- Attachment #2: t2-reorg.diff --]
[-- Type: text/plain, Size: 3751 bytes --]

	* config/arm/arm.c (thumb2_reorg): New function.
	(arm_reorg): Call it.
	* config/arm/thumb2.md (define_peephole2 for flag clobbering
	arithmetic operations): Delete.

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 158639)
+++ config/arm/arm.c	(working copy)
@@ -11470,6 +11470,61 @@ note_invalid_constants (rtx insn, HOST_W
   return result;
 }
 
+/* Convert instructions to their cc-clobbering variant if possible, since
+   that allows us to use smaller encodings.  */
+
+static void
+thumb2_reorg (void)
+{
+  basic_block bb;
+  regset_head live;
+
+  INIT_REG_SET (&live);
+
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+  df_analyze ();
+
+  FOR_EACH_BB (bb)
+    {
+      rtx insn;
+      COPY_REG_SET (&live, DF_LR_OUT (bb));
+      df_simulate_initialize_backwards (bb, &live);
+      FOR_BB_INSNS_REVERSE (bb, insn)
+	{
+	  if (NONJUMP_INSN_P (insn)
+	      && !REGNO_REG_SET_P (&live, CC_REGNUM))
+	    {
+	      rtx pat = PATTERN (insn);
+	      if (GET_CODE (pat) == SET
+		  && low_register_operand (XEXP (pat, 0), SImode)
+		  && thumb_16bit_operator (XEXP (pat, 1), SImode)
+		  && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
+		  && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
+		{
+		  rtx dst = XEXP (pat, 0);
+		  rtx src = XEXP (pat, 1);
+		  rtx op0 = XEXP (src, 0);
+		  rtx op1 = XEXP (src, 1);
+		  if (rtx_equal_p (dst, op0)
+		      || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+		    {
+		      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+		      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+		      rtx vec = gen_rtvec (2, pat, clobber);
+		      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+		      INSN_CODE (insn) = -1;
+		    }
+		}
+	    }
+	  if (NONDEBUG_INSN_P (insn))
+	    df_simulate_one_insn_backwards (bb, insn, &live);
+	}
+    }
+  CLEAR_REG_SET (&live);
+}
+
 /* Gcc puts the pool in the wrong place for ARM, since we can only
    load addresses a limited distance around the pc.  We do some
    special munging to move the constant pool values to the correct
@@ -11481,6 +11536,9 @@ arm_reorg (void)
   HOST_WIDE_INT address = 0;
   Mfix * fix;
 
+  if (TARGET_THUMB2)
+    thumb2_reorg ();
+  
   minipool_fix_head = minipool_fix_tail = NULL;
 
   /* The first insn must always be a note, or the code below won't
Index: config/arm/thumb2.md
===================================================================
--- config/arm/thumb2.md	(revision 158639)
+++ config/arm/thumb2.md	(working copy)
@@ -1069,29 +1069,6 @@ (define_insn_and_split "thumb2_eh_return
   }"
 )
 
-;; Peepholes and insns for 16-bit flag clobbering instructions.
-;; The conditional forms of these instructions do not clobber CC.
-;; However by the time peepholes are run it is probably too late to do
-;; anything useful with this information.
-(define_peephole2
-  [(set (match_operand:SI          0 "low_register_operand" "")
-        (match_operator:SI 3 "thumb_16bit_operator"
-	 [(match_operand:SI 1  "low_register_operand" "")
-	  (match_operand:SI 2 "low_register_operand" "")]))]
-  "TARGET_THUMB2
-   && (rtx_equal_p(operands[0], operands[1])
-       || GET_CODE(operands[3]) == PLUS
-       || GET_CODE(operands[3]) == MINUS)
-   && peep2_regno_dead_p(0, CC_REGNUM)"
-  [(parallel
-    [(set (match_dup 0)
-	  (match_op_dup 3
-	   [(match_dup 1)
-	    (match_dup 2)]))
-     (clobber (reg:CC CC_REGNUM))])]
-  ""
-)
-
 (define_insn "*thumb2_alusi3_short"
   [(set (match_operand:SI          0 "s_register_operand" "=l")
         (match_operator:SI 3 "thumb_16bit_operator"

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: ARM patch: Thumb2 reorg
  2010-04-28 10:03 ARM patch: Thumb2 reorg Bernd Schmidt
@ 2010-04-28 14:57 ` Richard Earnshaw
  2010-04-28 15:04   ` Bernd Schmidt
  2010-05-10  8:50 ` Nick Clifton
  1 sibling, 1 reply; 7+ messages in thread
From: Richard Earnshaw @ 2010-04-28 14:57 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: GCC Patches


On Wed, 2010-04-28 at 11:35 +0200, Bernd Schmidt wrote:
> This introduces a Thumb2-specific reorg pass.  While working on the IRA
> patch I posted yesterday, I noticed that ifcvt was affected by register
> allocation.  We have a peephole2 that converts arithmetic instructions
> into their flag-clobbering versions, since those have smaller encodings.
>  This happens a bit too early in the pipeline and can prevent further
> optimizations.
> 
> Fixed by moving it to the reorg pass.  Here's a sample of changes:
> 
> .L7:                            .L7:
>         ldrh    r1, [r2, r3]            ldrh    r1, [r2, r3]
>         cmp     r1, r0                  cmp     r1, r0
>         bne     .L6           |         itt     eq
>         adds    r1, r5, r3    |         addeq   r1, r5, r3
>         ldr     r4, [r1, #4]  |         ldreq   r4, [r1, #4]
> .L6:                          <
> 
> or
> 
>         mov     r2, r1        |         and     r2, r1, r4
>         mov     r3, r6        |         and     r3, r6, r5
>         ands    r2, r2, r4    <
>         ands    r3, r3, r5    <
> 
> 
> This patch has been regression tested on
> arm-linux-gnueabi(qemu-system-armv7{arch=armv7-a/thumb,thumb,}); I've
> also run SPEC2000 on Cortex-A9, where this patch together with the IRA
> patch improves overall score a tiny bit, with a ~2.5% improvement on
> gap.  Ok?
> 
> There are other things we could add to thumb2_reorg; e.g. creation of IT
> insns - currently we seem to overestimate the size of all cond_execs on
> Thumb2.
> 
> 
> Bernd

Hmm, neat.

However, we need to take the costs/benefits into account.  If the branch
is likely to be highly predictably taken, then this change may not be
beneficial (particularly on Coretex-A9).

A bit like combine, this patch should compare the costs of the
transformed code with the costs of the existing code and only do the
transformation if the results look better.

R.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: ARM patch: Thumb2 reorg
  2010-04-28 14:57 ` Richard Earnshaw
@ 2010-04-28 15:04   ` Bernd Schmidt
  0 siblings, 0 replies; 7+ messages in thread
From: Bernd Schmidt @ 2010-04-28 15:04 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: GCC Patches

On 04/28/2010 04:34 PM, Richard Earnshaw wrote:
> However, we need to take the costs/benefits into account.  If the branch
> is likely to be highly predictably taken, then this change may not be
> beneficial (particularly on Coretex-A9).

This change by itself does not involve branches.  It only moves the
conversion of insns to their cc-clobbering forms to the end of the
optimizer pipeline.  This allows passes like if-conversion to do their
job in more cases, for which I gave an example.

Tuning costs for if-conversion might be a worthwhile goal, but it's
unrelated to this patch.


Bernd

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: ARM patch: Thumb2 reorg
  2010-04-28 10:03 ARM patch: Thumb2 reorg Bernd Schmidt
  2010-04-28 14:57 ` Richard Earnshaw
@ 2010-05-10  8:50 ` Nick Clifton
  2010-06-09  9:51   ` Bernd Schmidt
  1 sibling, 1 reply; 7+ messages in thread
From: Nick Clifton @ 2010-05-10  8:50 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: GCC Patches

Hi Bernd,

> This introduces a Thumb2-specific reorg pass.
>
> 	* config/arm/arm.c (thumb2_reorg): New function.
> 	(arm_reorg): Call it.
> 	* config/arm/thumb2.md (define_peephole2 for flag clobbering
> 	arithmetic operations): Delete.

Approved - please apply.

Note - for the record I have had an offline conversation with Bernd 
about this patch and I now agree that, despite the possibility pointed 
out by Richard that it could introduce performance regressions in 
certain cases, it is still worthwhile overall.

Cheers
   Nick

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: ARM patch: Thumb2 reorg
  2010-05-10  8:50 ` Nick Clifton
@ 2010-06-09  9:51   ` Bernd Schmidt
  2010-06-12 15:53     ` Bootstrap broken on arm-linux was: " Laurent GUERBY
  0 siblings, 1 reply; 7+ messages in thread
From: Bernd Schmidt @ 2010-06-09  9:51 UTC (permalink / raw)
  To: Nick Clifton; +Cc: GCC Patches, Richard Earnshaw

On 05/10/2010 10:49 AM, Nick Clifton wrote:
> Note - for the record I have had an offline conversation with Bernd
> about this patch and I now agree that, despite the possibility pointed
> out by Richard that it could introduce performance regressions in
> certain cases, it is still worthwhile overall.

Just for the record, Richard also seemed ok with it when I spoke to him
a few weeks ago.  Retested & committed now.


Bernd

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Bootstrap broken on arm-linux was: ARM patch: Thumb2 reorg
  2010-06-09  9:51   ` Bernd Schmidt
@ 2010-06-12 15:53     ` Laurent GUERBY
  2010-06-13 12:37       ` Bernd Schmidt
  0 siblings, 1 reply; 7+ messages in thread
From: Laurent GUERBY @ 2010-06-12 15:53 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Nick Clifton, GCC Patches, Richard Earnshaw

On Wed, 2010-06-09 at 11:47 +0200, Bernd Schmidt wrote:
> On 05/10/2010 10:49 AM, Nick Clifton wrote:
> > Note - for the record I have had an offline conversation with Bernd
> > about this patch and I now agree that, despite the possibility pointed
> > out by Richard that it could introduce performance regressions in
> > certain cases, it is still worthwhile overall.
> 
> Just for the record, Richard also seemed ok with it when I spoke to him
> a few weeks ago.  Retested & committed now.

Hi,

This likely broke native bootstrap on ARM, see last comment of:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44458

/home/guerby/build/./prev-gcc/xgcc -B/home/guerby/build/./prev-gcc/
-B/n/57/guerby/install-trunk-160646/armv5tel-unknown-linux-gnueabi/bin/
-B/n/57/guerby/install-trunk-160646/armv5tel-unknown-linux-gnueabi/bin/
-B/n/57/guerby/install-t\
runk-160646/armv5tel-unknown-linux-gnueabi/lib/ -isystem
/n/57/guerby/install-trunk-160646/armv5tel-unknown-linux-gnueabi/include
-isystem
/n/57/guerby/install-trunk-160646/armv5tel-unknown-linux-gnueabi/sys-include   
-c   -g -O2 -gtog\
gle -DIN_GCC   -W -Wall -Wwrite-strings -Wcast-qual -Wstrict-prototypes
-Wmissing-prototypes -Wmissing-format-attribute -pedantic -Wno-long-long
-Wno-variadic-macros -Wno-overlength-strings -Werror -Wold-style-definition
-Wc++-compat -f\
no-common  -DHAVE_CONFIG_H -I. -I. -I../../trunk/gcc -I../../trunk/gcc/.
-I../../trunk/gcc/../include -I../../trunk/gcc/../libcpp/include 
-I../../trunk/gcc/../libdecnumber -I../../trunk/gcc/../libdecnumber/dpd
-I../libdecnumber     \
                ../../trunk/gcc/config/arm/arm.c -o arm.o
../../trunk/gcc/config/arm/arm.c: In function 'thumb2_reorg':
../../trunk/gcc/config/arm/arm.c:11466:19: error: initialization from
incompatible pointer type [-Werror]
../../trunk/gcc/config/arm/arm.c:11467:9: error: passing argument 3 of
'gen_rtx_fmt_E_stat' from incompatible pointer type [-Werror]
./genrtl.h:60:1: note: expected 'rtvec' but argument is of type 'rtx'
../../trunk/gcc/config/arm/arm.c:11460:9: error: unused variable 'op1'
[-Werror=unused-variable]
cc1: all warnings being treated as errors

make[3]: *** [arm.o] Error 1
make[3]: Leaving directory `/home/guerby/build/gcc'
make[2]: *** [all-stage2-gcc] Error 2
make[2]: Leaving directory `/home/guerby/build'
make[1]: *** [stage2-bubble] Error 2
make[1]: Leaving directory `/home/guerby/build'
make: *** [bootstrap] Error 2

Laurent



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Bootstrap broken on arm-linux was: ARM patch: Thumb2 reorg
  2010-06-12 15:53     ` Bootstrap broken on arm-linux was: " Laurent GUERBY
@ 2010-06-13 12:37       ` Bernd Schmidt
  0 siblings, 0 replies; 7+ messages in thread
From: Bernd Schmidt @ 2010-06-13 12:37 UTC (permalink / raw)
  To: Laurent GUERBY; +Cc: Nick Clifton, GCC Patches, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 238 bytes --]

On 06/12/2010 02:45 PM, Laurent GUERBY wrote:

> This likely broke native bootstrap on ARM, see last comment of:
> 
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44458

Sorry about that.  Hopefully fixed now with the patch below.


Bernd

[-- Attachment #2: t2-fix.diff --]
[-- Type: text/plain, Size: 1105 bytes --]

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 160663)
+++ ChangeLog	(working copy)
@@ -1,3 +1,7 @@
+2010-06-12  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* config/arm/arm.c (thumb2_reorg): Fix errors in previous change.
+
 2010-06-12  Jan Hubicka  <jh@suse.cz>
 
 	* df-core.c (df_clear_bb_info): New function.
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 160663)
+++ config/arm/arm.c	(working copy)
@@ -11457,13 +11457,12 @@
 		  rtx dst = XEXP (pat, 0);
 		  rtx src = XEXP (pat, 1);
 		  rtx op0 = XEXP (src, 0);
-		  rtx op1 = XEXP (src, 1);
 		  if (rtx_equal_p (dst, op0)
 		      || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
 		    {
 		      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
 		      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
-		      rtx vec = gen_rtvec (2, pat, clobber);
+		      rtvec vec = gen_rtvec (2, pat, clobber);
 		      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
 		      INSN_CODE (insn) = -1;
 		    }

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-06-12 23:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-28 10:03 ARM patch: Thumb2 reorg Bernd Schmidt
2010-04-28 14:57 ` Richard Earnshaw
2010-04-28 15:04   ` Bernd Schmidt
2010-05-10  8:50 ` Nick Clifton
2010-06-09  9:51   ` Bernd Schmidt
2010-06-12 15:53     ` Bootstrap broken on arm-linux was: " Laurent GUERBY
2010-06-13 12:37       ` Bernd Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).