public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, PR43920] Improve code-size optimizations
@ 2011-03-31 18:27 Tom de Vries
  2011-03-31 18:29 ` [PATCH, PR43920, 1/9] ARM specific part Tom de Vries
                   ` (4 more replies)
  0 siblings, 5 replies; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: Maxim Kuvyrkov, Bernd Schmidt

This patch set fixes PR 43920 - 'Choosing conditional execution over
conditional branches for code size in some cases'.

The patch set was tested on ARM and x86_64. The codesize changes were
benchmarked for ARM Thumb-2. For SPEC2000 and EEMBC CoreMark, a
reduction of 1.1% in the geomean of the benchmark sizes was measured.
For pic code, the reduction was 1.0%.

Patches will be posted separately.

Thanks,
- Tom

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 1/9] ARM specific part.
  2011-03-31 18:27 [PATCH, PR43920] Improve code-size optimizations Tom de Vries
@ 2011-03-31 18:29 ` Tom de Vries
  2011-04-01 14:46   ` Tom de Vries
  2011-03-31 18:31 ` [PATCH, PR43920, 2/9] ARM specific part - test case Tom de Vries
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:29 UTC (permalink / raw)
  To: gcc-patches, richard.earnshaw; +Cc: Maxim Kuvyrkov, Bernd Schmidt

[-- Attachment #1: Type: text/plain, Size: 1986 bytes --]

arm-size-branch_cost.patch reduces BRANCH_COST for thumb2 -Os to 1. The
lower branch cost makes expand choose branches to expand code like '(a
== b || c == d)'.

The impact of arm-size-branch_cost.patch on the example from the bug
report for ARM Thumb-2 -Os is a size reduction of 15%, from 68 to 58 bytes.

This size reduction is illustrated in this diff of the assembly
(left, without patch, size 68. right, with patch, size 58):
...
push    {r3, r4, r5, r6, r7,    push    {r3, r4, r5, r6, r7,
mov     r7, r1                  mov     r7, r1
mov     r6, r2                  mov     r6, r2
movs    r1, #0                  movs    r1, #0
movs    r2, #1                  movs    r2, #1
mov     r5, r0                  mov     r5, r0
bl      lseek                   bl      lseek
movs    r2, #2                <
movs    r1, #0                  movs    r1, #0
                              > movs    r2, #2
mov     r4, r0                  mov     r4, r0
mov     r0, r5                  mov     r0, r5
bl      lseek                   bl      lseek
sub     r2, r4, #-1           | adds    r2, r4, #1
rsbs    r3, r2, #0            | beq     .L3
adc     r3, r3, r2            | adds    r3, r0, #1
cmp     r0, #-1               | beq     .L2
it      eq                    <
orreq   r3, r3, #1            <
cbnz    r3, .L3               <
subs    r0, r0, r4              subs    r0, r0, r4
beq     .L4                   | beq     .L5
str     r4, [r7, #0]            str     r4, [r7, #0]
str     r0, [r6, #0]            str     r0, [r6, #0]
mov     r0, r3                | movs    r0, #0
pop     {r3, r4, r5, r6, r7,    pop     {r3, r4, r5, r6, r7,
.L3:                            .L3:
mov     r0, #-1               | mov     r0, r4
pop     {r3, r4, r5, r6, r7,    pop     {r3, r4, r5, r6, r7,
.L4:                          | .L5:
mov     r0, #-1                 mov     r0, #-1
                              > .L2:
pop     {r3, r4, r5, r6, r7,    pop     {r3, r4, r5, r6, r7,
...

Thanks,
- Tom

[-- Attachment #2: 1_arm-size-branch_cost.patch --]
[-- Type: text/x-patch, Size: 622 bytes --]

Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h	(revision 293961)
+++ gcc/config/arm/arm.h	(revision 293962)
@@ -2201,7 +2201,8 @@ typedef struct
 /* Try to generate sequences that don't involve branches, we can then use
    conditional instructions */
 #define BRANCH_COST(speed_p, predictable_p) \
-  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
+  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
+		: (optimize > 0 ? 2 : 0))
 \f
 /* Position Independent Code.  */
 /* We decide which register to use based on the compilation options and

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 2/9] ARM specific part - test case
  2011-03-31 18:27 [PATCH, PR43920] Improve code-size optimizations Tom de Vries
  2011-03-31 18:29 ` [PATCH, PR43920, 1/9] ARM specific part Tom de Vries
@ 2011-03-31 18:31 ` Tom de Vries
  2011-04-01 14:47   ` Tom de Vries
  2011-03-31 18:35 ` [PATCH, PR43920, 3/9] Cleanup Tom de Vries
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:31 UTC (permalink / raw)
  To: gcc-patches, richard.earnshaw

[-- Attachment #1: Type: text/plain, Size: 70 bytes --]

A testcase for the code in arm-size-branch_cost.patch.

Thanks,
- Tom

[-- Attachment #2: 2_arm-size-branch_cost.test.patch --]
[-- Type: text/x-patch, Size: 659 bytes --]

Index: gcc/testsuite/gcc.target/arm/pr43920-1.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-1.c	(revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv7-a -mthumb -Os" } */
+
+int
+f (int start, int end, int *start_)
+{
+  if (start == -1 || end == -1)
+    return -1;
+
+  if (end - start)
+    return -1;
+
+  *start_ = start;
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "\torr" 0 } } */
+/* { dg-final { scan-assembler-times "\tit\t" 0 } } */
+/* { dg-final { scan-assembler "\tbeq" } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 4-9/9] Cross-jumping.
  2011-03-31 18:27 [PATCH, PR43920] Improve code-size optimizations Tom de Vries
                   ` (2 preceding siblings ...)
  2011-03-31 18:35 ` [PATCH, PR43920, 3/9] Cleanup Tom de Vries
@ 2011-03-31 18:35 ` Tom de Vries
  2011-03-31 18:36   ` [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber Tom de Vries
                     ` (5 more replies)
  2011-03-31 21:16 ` [PATCH, PR43920] Improve code-size optimizations Eric Botcazou
  4 siblings, 6 replies; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:35 UTC (permalink / raw)
  To: gcc-patches, ebotcazou; +Cc: Maxim Kuvyrkov, Bernd Schmidt

The impact of patches 4-9 of the patch set on the example from the bug
report for ARM Thumb-2 -Os is a further size reduction of 7%, from 58 to
54 bytes.

This size reduction is illustrated in this diff of the assembly
(left, without patches, size 58. right, with patches, size 54):
...
push    {r3, r4, r5, r6, r7,    push    {r3, r4, r5, r6, r7,
mov     r7, r1                  mov     r7, r1
mov     r6, r2                  mov     r6, r2
movs    r1, #0                  movs    r1, #0
movs    r2, #1                  movs    r2, #1
mov     r5, r0                  mov     r5, r0
bl      lseek                   bl      lseek
movs    r1, #0                  movs    r1, #0
movs    r2, #2                  movs    r2, #2
mov     r4, r0                  mov     r4, r0
mov     r0, r5                  mov     r0, r5
bl      lseek                   bl      lseek
adds    r2, r4, #1              adds    r2, r4, #1
beq     .L3                  |  beq     .L5
adds    r3, r0, #1              adds    r3, r0, #1
beq     .L2                     beq     .L2
subs    r0, r0, r4              subs    r0, r0, r4
beq     .L5                     beq     .L5
str     r4, [r7, #0]            str     r4, [r7, #0]
str     r0, [r6, #0]            str     r0, [r6, #0]
movs    r0, #0                  movs    r0, #0
pop     {r3, r4, r5, r6, r7,    pop     {r3, r4, r5, r6, r7,
.L3:                         <
mov     r0, r4               <
pop     {r3, r4, r5, r6, r7, <
.L5:                            .L5:
mov     r0, #-1                 mov     r0, #-1
.L2:                            .L2:
pop     {r3, r4, r5, r6, r7,    pop     {r3, r4, r5, r6, r7,
...

The patches 4-9 allows crossjumping:
- to recognize that 'mov r0, r4' and 'move r0, #-1' have the same
  effect, and
- to extend the search scope past label .L2 backward to .L5, in order
  to recognize that the jump to .L3 can be replaced with a jump to .L5.

Thanks,
- Tom

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 3/9] Cleanup.
  2011-03-31 18:27 [PATCH, PR43920] Improve code-size optimizations Tom de Vries
  2011-03-31 18:29 ` [PATCH, PR43920, 1/9] ARM specific part Tom de Vries
  2011-03-31 18:31 ` [PATCH, PR43920, 2/9] ARM specific part - test case Tom de Vries
@ 2011-03-31 18:35 ` Tom de Vries
  2011-03-31 18:43   ` Jeff Law
  2011-04-01 14:48   ` Tom de Vries
  2011-03-31 18:35 ` [PATCH, PR43920, 4-9/9] Cross-jumping Tom de Vries
  2011-03-31 21:16 ` [PATCH, PR43920] Improve code-size optimizations Eric Botcazou
  4 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:35 UTC (permalink / raw)
  To: gcc-patches, ebotcazou; +Cc: Maxim Kuvyrkov, Bernd Schmidt

[-- Attachment #1: Type: text/plain, Size: 36 bytes --]

Cleans up some code.

Thanks,
- Tom

[-- Attachment #2: 3_crossjump-cleanup-ml.patch --]
[-- Type: text/x-patch, Size: 1265 bytes --]

diff -u gcc/cfgcleanup.c gcc/cfgcleanup.c
--- gcc/cfgcleanup.c	(working copy)
+++ gcc/cfgcleanup.c	(working copy)
@@ -1961,7 +1961,6 @@
   edge e, e2, fallthru;
   bool changed;
   unsigned max, ix, ix2;
-  basic_block ev, ev2;
 
   /* Nothing to do if there is not at least two incoming edges.  */
   if (EDGE_COUNT (bb->preds) < 2)
@@ -2001,9 +2000,9 @@
   fallthru = find_fallthru_edge (bb->preds);
 
   changed = false;
-  for (ix = 0, ev = bb; ix < EDGE_COUNT (ev->preds); )
+  for (ix = 0; ix < EDGE_COUNT (bb->preds);)
     {
-      e = EDGE_PRED (ev, ix);
+      e = EDGE_PRED (bb, ix);
       ix++;
 
       /* As noted above, first try with the fallthru predecessor (or, a
@@ -2021,7 +2020,6 @@
 	    {
 	      changed = true;
 	      ix = 0;
-	      ev = bb;
 	      continue;
 	    }
 	}
@@ -2045,10 +2043,9 @@
       if (EDGE_SUCC (e->src, 0) != e)
 	continue;
 
-      for (ix2 = 0, ev2 = bb; ix2 < EDGE_COUNT (ev2->preds); )
+      for (ix2 = 0; ix2 < EDGE_COUNT (bb->preds); ix2++)
 	{
-	  e2 = EDGE_PRED (ev2, ix2);
-	  ix2++;
+	  e2 = EDGE_PRED (bb, ix2);
 
 	  if (e2 == e)
 	    continue;
@@ -2071,7 +2068,6 @@
 	  if (try_crossjump_to_edge (mode, e, e2))
 	    {
 	      changed = true;
-	      ev2 = bb;
 	      ix = 0;
 	      break;
 	    }

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber.
  2011-03-31 18:35 ` [PATCH, PR43920, 4-9/9] Cross-jumping Tom de Vries
@ 2011-03-31 18:36   ` Tom de Vries
  2011-03-31 18:40     ` Jeff Law
  2011-04-01 14:48     ` Tom de Vries
  2011-03-31 18:42   ` [PATCH, PR43920, 5/9] Cross-jumping - Add missing use of return register Tom de Vries
                     ` (4 subsequent siblings)
  5 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:36 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 146 bytes --]

Currently uses and clobbers are counted as insns in crossjumping, which
can cause undesirable crossjumping.  The patch fixes this.

Thanks,
- Tom

[-- Attachment #2: 4_crossjump-dont-count-use-clobber-ml.patch --]
[-- Type: text/x-patch, Size: 750 bytes --]

Index: gcc/cfgcleanup.c
===================================================================
--- gcc/cfgcleanup.c	(revision 170556)
+++ gcc/cfgcleanup.c	(working copy)
@@ -1074,6 +1074,7 @@ flow_find_cross_jump (basic_block bb1, b
 {
   rtx i1, i2, last1, last2, afterlast1, afterlast2;
   int ninsns = 0;
+  rtx p1;
 
   /* Skip simple jumps at the end of the blocks.  Complex jumps still
      need to be compared for equivalence, which we'll do below.  */
@@ -1122,7 +1123,9 @@ flow_find_cross_jump (basic_block bb1, b
 
 	  afterlast1 = last1, afterlast2 = last2;
 	  last1 = i1, last2 = i2;
-	  ninsns++;
+	  p1 = PATTERN (i1);
+	  if (!(GET_CODE (p1) == USE || GET_CODE (p1) == CLOBBER))
+            ninsns++;
 	}
 
       i1 = PREV_INSN (i1);

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber.
  2011-03-31 18:36   ` [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber Tom de Vries
@ 2011-03-31 18:40     ` Jeff Law
  2011-03-31 19:09       ` Tom de Vries
  2011-04-01 14:48     ` Tom de Vries
  1 sibling, 1 reply; 64+ messages in thread
From: Jeff Law @ 2011-03-31 18:40 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 03/31/11 12:35, Tom de Vries wrote:
> Currently uses and clobbers are counted as insns in crossjumping, which
> can cause undesirable crossjumping.  The patch fixes this.
OK.  Please install.

jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNlMmRAAoJEBRtltQi2kC7ywUH/ivnGe5uPvAimV15fE8GzqYa
4wIwiffn/8+Rc+0aallBeyFsz85oAVjb7WpgkHQK1JI1n/EBEutIAQgyyKU07OaR
GFaylhyll4UgMaAN6Z1Czoy4a02KD7DgqBqIlBDZScikQiLPrFB7Awm/3sX+g/cD
5u2PODqwNa2lVT4Ob7+a6tuovUC7QfZgojeQ3OK5BEzJRENA2OcGavRIQAdr8nio
NIG4NTOrN8YpL08o8xOW5HImDeA1uxvH1ejYt11S8z00DtlFgqMMoQBVcHJ1yfg7
nXUmKRpSTrQcuQ2fEbKIIE/bI56X/FuTJ2FnA4bq/llSyBmpbzNmmWXeVz0dW4M=
=oLgw
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 5/9] Cross-jumping - Add missing use of return register.
  2011-03-31 18:35 ` [PATCH, PR43920, 4-9/9] Cross-jumping Tom de Vries
  2011-03-31 18:36   ` [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber Tom de Vries
@ 2011-03-31 18:42   ` Tom de Vries
  2011-03-31 18:52     ` Jeff Law
  2011-04-01 14:49     ` Tom de Vries
  2011-03-31 18:44   ` [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes Tom de Vries
                     ` (3 subsequent siblings)
  5 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:42 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 149 bytes --]

Inserts use of return register in epilogue threading, to keep
representation consistent, and prevent mismatch in crossjump matching.

Thanks,
- Tom


[-- Attachment #2: 5_crossjump-use-return-register-ml.patch --]
[-- Type: text/x-patch, Size: 1650 bytes --]

Index: gcc/function.c
===================================================================
--- gcc/function.c	(revision 170556)
+++ gcc/function.c	(working copy)
@@ -5241,6 +5241,19 @@ prologue_epilogue_contains (const_rtx in
   return 0;
 }
 
+/* Insert use of return register before the end of BB.  */
+
+static void
+emit_use_return_register_into_block (basic_block bb)
+{
+  rtx seq;
+  start_sequence ();
+  use_return_register ();
+  seq = get_insns ();
+  end_sequence ();
+  emit_insn_before (seq, BB_END (bb));
+}
+
 #ifdef HAVE_return
 /* Insert gen_return at the end of block BB.  This also means updating
    block_for_insn appropriately.  */
@@ -5395,6 +5408,15 @@ thread_prologue_and_epilogue_insns (void
 		 with a simple return instruction.  */
 	      if (simplejump_p (jump))
 		{
+		  /* The use of the return register might be present in the exit
+		     fallthru block.  Either:
+		     - removing the use is safe, and we should remove the use in
+		       the exit fallthru block, or
+		     - removing the use is not safe, and we should add it here.
+		     For now, we conservatively choose the latter.  Either of the
+		     2 helps in crossjumping.  */
+		  emit_use_return_register_into_block (bb);
+
 		  emit_return_into_block (bb);
 		  delete_insn (jump);
 		}
@@ -5409,6 +5431,9 @@ thread_prologue_and_epilogue_insns (void
 		      continue;
 		    }
 
+                  /* See comment in simple_jump_p case above.  */
+		  emit_use_return_register_into_block (bb);
+
 		  /* If this block has only one successor, it both jumps
 		     and falls through to the fallthru block, so we can't
 		     delete the edge.  */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 3/9] Cleanup.
  2011-03-31 18:35 ` [PATCH, PR43920, 3/9] Cleanup Tom de Vries
@ 2011-03-31 18:43   ` Jeff Law
  2011-04-01 14:48   ` Tom de Vries
  1 sibling, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-03-31 18:43 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou, Maxim Kuvyrkov, Bernd Schmidt

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 03/31/11 12:29, Tom de Vries wrote:
> Cleans up some code.
OK.  Note that it would have helped if you mentioned that "bb" was a
loop invariant and thus "ev" and "ev2" were loop invariants that could
be replaced by "bb".

Please install..  Thanks,

jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNlMqtAAoJEBRtltQi2kC7AVYIAIWNzXn/51nk+xp4ZE+PS3T6
NU/y9m0q3x/hU+SpSzFX5l+hw/+mT+WQou9fU/Gzf9CQlWLk4i+6cXLbG60vGmFo
b38R4R7qnPpAI1OiHddaoh5eGzYHWkZQdgtYxLkJv6oqwF0EzSMg7/F7lXkaPsZK
agSSQBNFNTWFV3jD+YeplioAWphzbWeQf+d5glNuc5sP+ImUU/Mpg5Sh1WC3n1C3
i9XBwSczwzikHyvrmGJQcdcv3NL9MkazcMSbCD7jg32Zx/bXqdtlJ+qzkQGv1PAt
VQMzna8mqm3YDd4yOUo3xzJBA1SQM8Ff8xd1dVm4QL8WUPDBNZztobQ5f2U4Zi4=
=bDHO
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes.
  2011-03-31 18:35 ` [PATCH, PR43920, 4-9/9] Cross-jumping Tom de Vries
  2011-03-31 18:36   ` [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber Tom de Vries
  2011-03-31 18:42   ` [PATCH, PR43920, 5/9] Cross-jumping - Add missing use of return register Tom de Vries
@ 2011-03-31 18:44   ` Tom de Vries
  2011-03-31 18:56     ` Jeff Law
  2011-03-31 18:45   ` [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope Tom de Vries
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:44 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 157 bytes --]

Uses regnotes to analyze whether we can replace insn a by insn b, even
if we cannot replace insn b by insn a. Uses this info in crossjumping.

Thanks,
- Tom

[-- Attachment #2: 6_crossjump-regnotes-ml.patch --]
[-- Type: text/x-patch, Size: 9872 bytes --]

diff -u gcc/cfgcleanup.c gcc/cfgcleanup.c
--- gcc/cfgcleanup.c	(working copy)
+++ gcc/cfgcleanup.c	(working copy)
@@ -72,7 +72,7 @@
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
-static bool old_insns_match_p (int, rtx, rtx);
+static enum replace_direction old_insns_match_p (int, rtx, rtx);
 
 static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block);
 static void merge_blocks_move_successor_nojumps (basic_block, basic_block);
@@ -950,27 +950,108 @@
 }
 
 
+/* Examine register notes on I1 and I2 and return:
+   - dir_forward if I1 can be replaced by I2, or
+   - dir_backward if I2 can be replaced by I1, or
+   - dir_both if both are the case.  */
+
+static enum replace_direction
+can_replace_by (rtx i1, rtx i2)
+{
+  rtx p1, p2, d1, d2, src1, src2, note1, note2;
+  bool c1, c2;
+
+  /* Check for 2 sets.  */
+  if (!INSN_P (i1) || !INSN_P (i2))
+    return dir_none;
+  p1 = PATTERN (i1);
+  p2 = PATTERN (i2);
+  if (GET_CODE (p1) != SET || GET_CODE (p2) != SET)
+    return dir_none;
+
+  /* Check that the 2 sets set the same dest.  */
+  d1 = SET_DEST (p1);
+  d2 = SET_DEST (p2);
+  if (!(reload_completed
+        ? rtx_renumbered_equal_p (d1, d2) : rtx_equal_p (d1, d2)))
+    return dir_none;
+
+  /* Find identical req_equiv or reg_equal note, which implies that the 2 sets
+     set dest to the same value.  */
+  note1 = find_reg_equal_equiv_note (i1);
+  note2 = find_reg_equal_equiv_note (i2);
+  if (!note1 || !note2 || !rtx_equal_p (XEXP (note1, 0), XEXP (note2, 0))
+      || !CONST_INT_P (XEXP (note1, 0)))
+    return dir_none;
+
+  /* Although the 2 sets set dest to the same value, we cannot replace
+       (set (dest) (const_int))
+     by
+       (set (dest) (reg))
+     because we don't know if the reg is live and has the same value at the
+     location of replacement.  */
+  src1 = SET_SRC (p1);
+  src2 = SET_SRC (p2);
+  c1 = CONST_INT_P (src1);
+  c2 = CONST_INT_P (src2);
+  if (c1 && c2)
+    return dir_both;
+  else if (c2)
+    return dir_forward;
+  else if (c1)
+    return dir_backward;
+
+  return dir_none;
+}
+
+/* Merges directions A and B.  */
+
+static enum replace_direction
+merge_dir (enum replace_direction a, enum replace_direction b)
+{
+  /* Implements the following table:
+        |bo fw bw no
+     ---+-----------
+     bo |bo fw bw no
+     fw |-- fw no no
+     bw |-- -- bw no
+     no |-- -- -- no.  */
+
+  if (a == b)
+    return a;
+
+  if (a == dir_both)
+    return b;
+  if (b == dir_both)
+    return a;
+
+  return dir_none;
+}
+
-/* Return true if I1 and I2 are equivalent and thus can be crossjumped.  */
+/* Examine I1 and I2 and return:
+   - dir_forward if I1 can be replaced by I2, or
+   - dir_backward if I2 can be replaced by I1, or
+   - dir_both if both are the case.  */
 
-static bool
+static enum replace_direction
 old_insns_match_p (int mode ATTRIBUTE_UNUSED, rtx i1, rtx i2)
 {
   rtx p1, p2;
 
   /* Verify that I1 and I2 are equivalent.  */
   if (GET_CODE (i1) != GET_CODE (i2))
-    return false;
+    return dir_none;
 
   /* __builtin_unreachable() may lead to empty blocks (ending with
      NOTE_INSN_BASIC_BLOCK).  They may be crossjumped. */
   if (NOTE_INSN_BASIC_BLOCK_P (i1) && NOTE_INSN_BASIC_BLOCK_P (i2))
-    return true;
+    return dir_both;
 
   p1 = PATTERN (i1);
   p2 = PATTERN (i2);
 
   if (GET_CODE (p1) != GET_CODE (p2))
-    return false;
+    return dir_none;
 
   /* If this is a CALL_INSN, compare register usage information.
      If we don't check this on stack register machines, the two
@@ -991,15 +1072,15 @@
       rtx n2 = find_reg_note (i2, REG_EH_REGION, 0);
 
       if (!n1 && n2)
-	return false;
+	return dir_none;
 
       if (n1 && (!n2 || XEXP (n1, 0) != XEXP (n2, 0)))
-	return false;
+	return dir_none;
 
       if (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
 			CALL_INSN_FUNCTION_USAGE (i2))
 	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2))
-	return false;
+	return dir_none;
     }
 
 #ifdef STACK_REGS
@@ -1028,15 +1109,15 @@
 	  SET_HARD_REG_BIT (i2_regset, REGNO (XEXP (note, 0)));
 
       if (!hard_reg_set_equal_p (i1_regset, i2_regset))
-	return false;
+	return dir_none;
     }
 #endif
 
   if (reload_completed
       ? rtx_renumbered_equal_p (p1, p2) : rtx_equal_p (p1, p2))
-    return true;
+    return dir_both;
 
-  return false;
+  return can_replace_by (i1, i2);
 }
 \f
 /* When comparing insns I1 and I2 in flow_find_cross_jump or
@@ -1063,18 +1144,32 @@
 }
 
 /* Look through the insns at the end of BB1 and BB2 and find the longest
-   sequence that are equivalent.  Store the first insns for that sequence
-   in *F1 and *F2 and return the sequence length.
+   sequence that are either equivalent, or allow forward or backward
+   replacement.  Store the first insns for that sequence in *F1 and *F2 and
+   return the sequence length.
+
+   DIR_P indicates the allowed replacement direction on function entry, and
+   the actual replacement direction on function exit.  If NULL, only equivalent
+   sequences are allowed.
 
    To simplify callers of this function, if the blocks match exactly,
    store the head of the blocks in *F1 and *F2.  */
 
 int
-flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx *f1, rtx *f2)
+flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx *f1, rtx *f2,
+                      enum replace_direction *dir_p)
 {
   rtx i1, i2, last1, last2, afterlast1, afterlast2;
   int ninsns = 0;
   rtx p1;
+  enum replace_direction dir, last_dir, afterlast_dir;
+
+  if (dir_p)
+    dir = *dir_p;
+  else
+    dir = dir_both;
+  afterlast_dir = dir;
+  last_dir = afterlast_dir;
 
   /* Skip simple jumps at the end of the blocks.  Complex jumps still
      need to be compared for equivalence, which we'll do below.  */
@@ -1111,7 +1206,8 @@
       if (i1 == BB_HEAD (bb1) || i2 == BB_HEAD (bb2))
 	break;
 
-      if (!old_insns_match_p (0, i1, i2))
+      dir = merge_dir (dir, old_insns_match_p (0, i1, i2));
+      if (dir == dir_none || (!dir_p && dir != dir_both))
 	break;
 
       merge_memattrs (i1, i2);
@@ -1123,6 +1219,8 @@
 
 	  afterlast1 = last1, afterlast2 = last2;
 	  last1 = i1, last2 = i2;
+	  afterlast_dir = last_dir;
+	  last_dir = dir;
 	  p1 = PATTERN (i1);
 	  if (!(GET_CODE (p1) == USE || GET_CODE (p1) == CLOBBER))
             ninsns++;
@@ -1136,7 +1234,7 @@
   /* Don't allow the insn after a compare to be shared by
      cross-jumping unless the compare is also shared.  */
   if (ninsns && reg_mentioned_p (cc0_rtx, last1) && ! sets_cc0_p (last1))
-    last1 = afterlast1, last2 = afterlast2, ninsns--;
+    last1 = afterlast1, last2 = afterlast2, last_dir = afterlast_dir, ninsns--;
 #endif
 
   /* Include preceding notes and labels in the cross-jump.  One,
@@ -1162,7 +1260,9 @@
       *f2 = last2;
     }
 
+  if (dir_p)
+    *dir_p = last_dir;
   return ninsns;
 }
 
       /* Ignore notes.  */
@@ -1226,7 +1326,7 @@
 	      && nehedges1 != nehedges2))
 	break;
 
-      if (!old_insns_match_p (0, i1, i2))
+      if (old_insns_match_p (0, i1, i2) != dir_both)
 	break;
 
       merge_memattrs (i1, i2);
@@ -1455,7 +1555,8 @@
 		  rr.update_label_nuses = false;
 		  for_each_rtx (&BB_END (bb1), replace_label, &rr);
 
-		  match = old_insns_match_p (mode, BB_END (bb1), BB_END (bb2));
+		  match = (old_insns_match_p (mode, BB_END (bb1), BB_END (bb2))
+			   == dir_both);
 		  if (dump_file && match)
 		    fprintf (dump_file,
 			     "Tablejumps in bb %i and %i match.\n",
@@ -1477,7 +1578,7 @@
 
   /* First ensure that the instructions match.  There may be many outgoing
      edges so this test is generally cheaper.  */
-  if (!old_insns_match_p (mode, BB_END (bb1), BB_END (bb2)))
+  if (old_insns_match_p (mode, BB_END (bb1), BB_END (bb2)) != dir_both)
     return false;
 
   /* Search the outgoing edges, ensure that the counts do match, find possible
@@ -1578,6 +1679,7 @@
   int nmatch;
   basic_block src1 = e1->src, src2 = e2->src;
   basic_block redirect_to, redirect_from, to_remove;
+  enum replace_direction dir;
   rtx newpos1, newpos2;
   edge s;
   edge_iterator ei;
@@ -1633,7 +1735,8 @@
     return false;
 
   /* ... and part the second.  */
-  nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2);
+  dir = dir_forward;
+  nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2, &dir);
 
   /* Don't proceed with the crossjump unless we found a sufficient number
      of matching instructions or the 'from' block was totally matched
--- gcc/ifcvt.c	(revision 170556)
+++ gcc/ifcvt.c	(working copy)
@@ -476,7 +476,8 @@ cond_exec_process_if_block (ce_if_block_
       /* Look for matching sequences at the head and tail of the two blocks,
 	 and limit the range of insns to be converted if possible.  */
       n_matching = flow_find_cross_jump (then_bb, else_bb,
-					 &then_first_tail, &else_first_tail);
+					 &then_first_tail, &else_first_tail,
+                                         NULL);
       if (then_first_tail == BB_HEAD (then_bb))
 	then_start = then_end = NULL_RTX;
       if (else_first_tail == BB_HEAD (else_bb))
--- gcc/basic-block.h	(revision 170556)
+++ gcc/basic-block.h	(working copy)
@@ -803,9 +803,12 @@ extern bool purge_dead_edges (basic_bloc
 extern void find_many_sub_basic_blocks (sbitmap);
 extern void rtl_make_eh_edge (sbitmap, basic_block, rtx);
 
+enum replace_direction { dir_none, dir_forward, dir_backward, dir_both };
+
 /* In cfgcleanup.c.  */
 extern bool cleanup_cfg (int);
-extern int flow_find_cross_jump (basic_block, basic_block, rtx *, rtx *);
+extern int flow_find_cross_jump (basic_block, basic_block, rtx *, rtx *,
+                                 enum replace_direction*);
 extern int flow_find_head_matching_sequence (basic_block, basic_block,
 					     rtx *, rtx *, int);
 

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope.
  2011-03-31 18:35 ` [PATCH, PR43920, 4-9/9] Cross-jumping Tom de Vries
                     ` (2 preceding siblings ...)
  2011-03-31 18:44   ` [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes Tom de Vries
@ 2011-03-31 18:45   ` Tom de Vries
  2011-03-31 18:58     ` Jeff Law
  2011-04-01 14:55     ` Tom de Vries
  2011-03-31 18:46   ` [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case Tom de Vries
  2011-03-31 18:56   ` [PATCH, PR43920, 9/9] Cross-jumping - Allow both directions Tom de Vries
  5 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:45 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 53 bytes --]

Allows crossjump over fallthru paths.

Thanks,
- Tom

[-- Attachment #2: 7_crossjump-fallthru-ml.patch --]
[-- Type: text/x-patch, Size: 6897 bytes --]

diff -u gcc/cfgcleanup.c gcc/cfgcleanup.c
--- gcc/cfgcleanup.c	(working copy)
+++ gcc/cfgcleanup.c	(working copy)
@@ -1139,6 +1139,43 @@
     }
 }
 
+ /* Walks from I1 in BB1 backward till the next non-debug insn, and returns the
+    resulting insn in I1, and the corresponding bb in BB1.  At the head of a
+    bb, if there is a predecessor bb that reaches this bb via fallthru, and
+    FOLLOW_FALLTHRU, walks further in the predecessor bb and registers this in
+    DID_FALLTHRU.  Otherwise, stops at the head of the bb.  */
+
+static void
+walk_to_nondebug_insn (rtx *i1, basic_block *bb1, bool follow_fallthru,
+                       bool *did_fallthru)
+{
+  edge fallthru;
+
+  *did_fallthru = false;
+
+  /* Ignore notes.  */
+  while (!NONDEBUG_INSN_P (*i1))
+    {
+      if (*i1 != BB_HEAD (*bb1))
+        {
+          *i1 = PREV_INSN (*i1);
+          continue;
+        }
+
+      if (!follow_fallthru)
+        return;
+
+      fallthru = find_fallthru_edge ((*bb1)->preds);
+      if (!fallthru || fallthru->src == ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
+          || !single_succ_p (fallthru->src))
+        return;
+
+      *bb1 = fallthru->src;
+      *i1 = BB_END (*bb1);
+      *did_fallthru = true;
+     }
+}
+
 /* Look through the insns at the end of BB1 and BB2 and find the longest
    sequence that are equivalent.  Store the first insns for that sequence
    in *F1 and *F2 and return the sequence length.
@@ -1153,6 +1190,7 @@
   rtx i1, i2, last1, last2, afterlast1, afterlast2;
   int ninsns = 0;
   enum replace_direction dir, last_dir, afterlast_dir;
+  bool follow_fallthru, did_fallthru;
 
   if (dir_p)
     dir = *dir_p;
@@ -1187,11 +1225,30 @@
   while (true)
     {
-      /* Ignore notes.  */
-      while (!NONDEBUG_INSN_P (i1) && i1 != BB_HEAD (bb1))
-	i1 = PREV_INSN (i1);
-
-      while (!NONDEBUG_INSN_P (i2) && i2 != BB_HEAD (bb2))
-	i2 = PREV_INSN (i2);
+      /* In the following example, we can replace all jumps to C by jumps to A.
+
+         This removes 4 duplicate insns.
+         [bb A] insn1            [bb C] insn1
+                insn2                   insn2
+         [bb B] insn3                   insn3
+                insn4                   insn4
+                jump_insn               jump_insn
+
+         We could also replace all jumps to A by jumps to C, but that leaves B
+         alive, and removes only 2 duplicate insns.  In a subsequent crossjump
+         step, all jumps to B would be replaced with jumps to the middle of C,
+         achieving the same result with more effort.
+         So we allow only the first possibility, which means that we don't allow
+         fallthru in the block that's being replaced.  */
+
+      follow_fallthru = dir_p && dir != dir_forward;
+      walk_to_nondebug_insn (&i1, &bb1, follow_fallthru, &did_fallthru);
+      if (did_fallthru)
+        dir = dir_backward;
+
+      follow_fallthru = dir_p && dir != dir_backward;
+      walk_to_nondebug_insn (&i2, &bb2, follow_fallthru, &did_fallthru);
+      if (did_fallthru)
+        dir = dir_forward;
 
       if (i1 == BB_HEAD (bb1) || i2 == BB_HEAD (bb2))
 	break;
@@ -1230,12 +1287,14 @@
      Two, it keeps line number notes as matched as may be.  */
   if (ninsns)
     {
+      bb1 = BLOCK_FOR_INSN (last1);
       while (last1 != BB_HEAD (bb1) && !NONDEBUG_INSN_P (PREV_INSN (last1)))
 	last1 = PREV_INSN (last1);
 
       if (last1 != BB_HEAD (bb1) && LABEL_P (PREV_INSN (last1)))
 	last1 = PREV_INSN (last1);
 
+      bb2 = BLOCK_FOR_INSN (last2);
       while (last2 != BB_HEAD (bb2) && !NONDEBUG_INSN_P (PREV_INSN (last2)))
 	last2 = PREV_INSN (last2);
 
@@ -1659,6 +1718,7 @@
   int nmatch;
   basic_block src1 = e1->src, src2 = e2->src;
   basic_block redirect_to, redirect_from, to_remove;
+  basic_block osrc1, osrc2, redirect_edges_to, tmp;
   enum replace_direction dir;
   rtx newpos1, newpos2;
   edge s;
@@ -1720,8 +1780,15 @@
     return false;
 
   /* ... and part the second.  */
   dir = dir_forward;
   nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2, &dir);
+
+  osrc1 = src1;
+  osrc2 = src2;
+  if (newpos1 != NULL_RTX)
+    src1 = BLOCK_FOR_INSN (newpos1);
+  if (newpos2 != NULL_RTX)
+    src2 = BLOCK_FOR_INSN (newpos2);
 
   /* Don't proceed with the crossjump unless we found a sufficient number
      of matching instructions or the 'from' block was totally matched
@@ -1745,8 +1812,8 @@
       rtx label1, label2;
       rtx table1, table2;
 
-      if (tablejump_p (BB_END (src1), &label1, &table1)
-	  && tablejump_p (BB_END (src2), &label2, &table2)
+      if (tablejump_p (BB_END (osrc1), &label1, &table1)
+	  && tablejump_p (BB_END (osrc2), &label2, &table2)
 	  && label1 != label2)
 	{
 	  replace_label_data rr;
@@ -1761,7 +1828,7 @@
 	      /* Do not replace the label in SRC1->END because when deleting
 		 a block whose end is a tablejump, the tablejump referenced
 		 from the instruction is deleted too.  */
-	      if (insn != BB_END (src1))
+	      if (insn != BB_END (osrc1))
 		for_each_rtx (&insn, replace_label, &rr);
 	    }
 	}
@@ -1802,8 +1869,13 @@
   /* We may have some registers visible through the block.  */
   df_set_bb_dirty (redirect_to);
 
+  if (osrc2 == src2)
+    redirect_edges_to = redirect_to;
+  else
+    redirect_edges_to = osrc2;
+
   /* Recompute the frequencies and counts of outgoing edges.  */
-  FOR_EACH_EDGE (s, ei, redirect_to->succs)
+  FOR_EACH_EDGE (s, ei, redirect_edges_to->succs)
     {
       edge s2;
       edge_iterator ei;
@@ -1846,24 +1918,32 @@
 	    s2->dest->count = 0;
 	}
 
-      if (!redirect_to->frequency && !src1->frequency)
+      if (!redirect_edges_to->frequency && !src1->frequency)
 	s->probability = (s->probability + s2->probability) / 2;
       else
 	s->probability
-	  = ((s->probability * redirect_to->frequency +
+	  = ((s->probability * redirect_edges_to->frequency +
 	      s2->probability * src1->frequency)
-	     / (redirect_to->frequency + src1->frequency));
+	     / (redirect_edges_to->frequency + src1->frequency));
     }
 
   /* Adjust count and frequency for the block.  An earlier jump
      threading pass may have left the profile in an inconsistent
      state (see update_bb_profile_for_threading) so we must be
      prepared for overflows.  */
-  redirect_to->count += src1->count;
-  redirect_to->frequency += src1->frequency;
-  if (redirect_to->frequency > BB_FREQ_MAX)
-    redirect_to->frequency = BB_FREQ_MAX;
-  update_br_prob_note (redirect_to);
+  tmp = redirect_to;
+  do
+    {
+      tmp->count += src1->count;
+      tmp->frequency += src1->frequency;
+      if (tmp->frequency > BB_FREQ_MAX)
+        tmp->frequency = BB_FREQ_MAX;
+      if (tmp == redirect_edges_to)
+        break;
+      tmp = find_fallthru_edge (tmp->succs)->dest;
+    }
+  while (true);
+  update_br_prob_note (redirect_edges_to);
 
   /* Edit SRC1 to go to REDIRECT_TO at NEWPOS1.  */
 

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case.
  2011-03-31 18:35 ` [PATCH, PR43920, 4-9/9] Cross-jumping Tom de Vries
                     ` (3 preceding siblings ...)
  2011-03-31 18:45   ` [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope Tom de Vries
@ 2011-03-31 18:46   ` Tom de Vries
  2011-03-31 19:00     ` Jeff Law
  2011-04-01 14:56     ` Tom de Vries
  2011-03-31 18:56   ` [PATCH, PR43920, 9/9] Cross-jumping - Allow both directions Tom de Vries
  5 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:46 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 58 bytes --]

Test-case for crossjump-fallthru-ml.patch.

Thanks,
- Tom

[-- Attachment #2: 8_crossjump-fallthru-ml.test.patch --]
[-- Type: text/x-patch, Size: 863 bytes --]

Index: gcc/testsuite/gcc.target/arm/pr43920-2.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv7-a -mthumb -Os" } */
+
+#include <stdio.h>
+
+int getFileStartAndLength (int fd, int *start_, size_t *length_)
+{
+      int start, end;
+      size_t length;
+
+      start = lseek (fd, 0L, SEEK_CUR);
+      end = lseek (fd, 0L, SEEK_END);
+
+      if (start == -1 || end == -1)
+         return -1;
+
+      length = end - start;
+      if (length == 0)
+         return -1;
+
+      *start_ = start;
+      *length_ = length;
+
+      return 0;
+}
+
+/* { dg-final { scan-assembler-times "pop" 2 } } */
+/* { dg-final { scan-assembler-times "beq" 3 } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 5/9] Cross-jumping - Add missing use of return register.
  2011-03-31 18:42   ` [PATCH, PR43920, 5/9] Cross-jumping - Add missing use of return register Tom de Vries
@ 2011-03-31 18:52     ` Jeff Law
  2011-04-01 14:49     ` Tom de Vries
  1 sibling, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-03-31 18:52 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 03/31/11 12:40, Tom de Vries wrote:
> Inserts use of return register in epilogue threading, to keep
> representation consistent, and prevent mismatch in crossjump matching.
OK.
Jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNlMu/AAoJEBRtltQi2kC7lTwIAJeKk9DQjxzDqI4wk2skl6x3
a5AJ5QfxCG35V0djxabJgapTKuxg7FzLPHcXyHCA44/eOiHTOZwlP6wRDlkQtVpA
NszbRIkFyRXMI6r5h9G7uITKes1xUYFMvBFRVFjTyDZYoNZoxGtIPYw4+6CVGXg2
7jD1LWUwaJRwQVqqdC3JMu9GwsxPK81Fsl2zjCRvYcwXhCB2YjzjQiA5ZGIGmd5G
DQ5RPavum8QsP8pHPBo+oWB/1Lc1lKbE4Gbal4IPWVR+GuyY3JuTIOcfTljhbkWe
kOj0R++Yt/3UUcwT8i8ywRxIlsAYQgR6AZrbLaqQ3u85BgEjjotrCur7x76z7TA=
=RU+t
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* [PATCH, PR43920, 9/9] Cross-jumping - Allow both directions.
  2011-03-31 18:35 ` [PATCH, PR43920, 4-9/9] Cross-jumping Tom de Vries
                     ` (4 preceding siblings ...)
  2011-03-31 18:46   ` [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case Tom de Vries
@ 2011-03-31 18:56   ` Tom de Vries
  2011-03-31 19:02     ` Jeff Law
  2011-04-01 14:56     ` Tom de Vries
  5 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 18:56 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 305 bytes --]

Allow crossjumping in both directions. Crossjump was assumed to be
symmetric, and therefore only applied on edges e1,e2 and not on e2,e1.
Now given both the fallthru fix and the regnotes fix, crossjumping is
not symmetrical anymore, and we allow both directions (but not by
testing twice).

Thanks,
- Tom

[-- Attachment #2: 9_crossjump-backward-ml.patch --]
[-- Type: text/x-patch, Size: 2710 bytes --]

diff -u gcc/cfgcleanup.c gcc/cfgcleanup.c
--- gcc/cfgcleanup.c	(working copy)
+++ gcc/cfgcleanup.c	(working copy)
@@ -69,7 +69,7 @@
    information; we should run df_analyze to enable more opportunities.  */
 static bool block_was_dirty;
 
-static bool try_crossjump_to_edge (int, edge, edge);
+static bool try_crossjump_to_edge (int, edge, edge, enum replace_direction);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
 static enum replace_direction old_insns_match_p (int, rtx, rtx);
@@ -1695,15 +1695,17 @@
 /* E1 and E2 are edges with the same destination block.  Search their
    predecessors for common code.  If found, redirect control flow from
-   (maybe the middle of) E1->SRC to (maybe the middle of) E2->SRC.  */
+   (maybe the middle of) E1->SRC to (maybe the middle of) E2->SRC (dir_forward),
+   or the other way around (dir_backward).  DIR specifies the allowed
+   replacement direction.  */
 
 static bool
-try_crossjump_to_edge (int mode, edge e1, edge e2)
+try_crossjump_to_edge (int mode, edge e1, edge e2,
+                       enum replace_direction dir)
 {
   int nmatch;
   basic_block src1 = e1->src, src2 = e2->src;
   basic_block redirect_to, redirect_from, to_remove;
   basic_block osrc1, osrc2, redirect_edges_to, tmp;
-  enum replace_direction dir;
   rtx newpos1, newpos2;
   edge s;
   edge_iterator ei;
@@ -1757,8 +1759,7 @@
     return false;
 
   /* ... and part the second.  */
-  dir = dir_forward;
   nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2, &dir);
 
   osrc1 = src1;
   osrc2 = src2;
@@ -1767,5 +1768,15 @@
   if (newpos2 != NULL_RTX)
     src2 = BLOCK_FOR_INSN (newpos2);
 
+  if (dir == dir_backward)
+    {
+#define SWAP(T, X, Y) do { T tmp = (X); (X) = (Y); (Y) = tmp; } while (0)
+      SWAP (basic_block, osrc1, osrc2);
+      SWAP (basic_block, src1, src2);
+      SWAP (edge, e1, e2);
+      SWAP (rtx, newpos1, newpos2);
+#undef SWAP
+    }
+
   /* Don't proceed with the crossjump unless we found a sufficient number
      of matching instructions or the 'from' block was totally matched
@@ -2020,7 +2031,7 @@
 		   || (fallthru->src->flags & BB_MODIFIED)))
 	    continue;
 
-	  if (try_crossjump_to_edge (mode, e, fallthru))
+	  if (try_crossjump_to_edge (mode, e, fallthru, dir_forward))
 	    {
 	      changed = true;
 	      ix = 0;
@@ -2068,7 +2079,9 @@
 		   || (e2->src->flags & BB_MODIFIED)))
 	    continue;
 
-	  if (try_crossjump_to_edge (mode, e, e2))
+	  /* Both e and e2 are not fallthru edges, so we can crossjump in either
+	     direction.  */
+	  if (try_crossjump_to_edge (mode, e, e2, dir_both))
 	    {
 	      changed = true;
 	      ix = 0;

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes.
  2011-03-31 18:44   ` [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes Tom de Vries
@ 2011-03-31 18:56     ` Jeff Law
  2011-03-31 21:25       ` Tom de Vries
  0 siblings, 1 reply; 64+ messages in thread
From: Jeff Law @ 2011-03-31 18:56 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 03/31/11 12:42, Tom de Vries wrote:
> Uses regnotes to analyze whether we can replace insn a by insn b, even
> if we cannot replace insn b by insn a. Uses this info in crossjumping.
Shouldn't this be using single_set rather than digging through PATTERN,
then verifying both are SETs, etc.?

Otherwise don't you miss most of the benefit on architectures where most
insns clobber the flags register in a PARALLEL with the SET?

Jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNlM1fAAoJEBRtltQi2kC7gcEIAKVH96iqxytC9FJ3DRZE20qr
ftL5Yzatr2Tp2xXMwbF6Fck0+Xgr3dKabru14+Rwn1g8k5IDROaPq8rEZ38yYMrW
jt7Vuffw4z670FGa42Vsj9tR8PrzhOmouI/JGLQMk+eeP6bpOSEDFBx4lRxvBFG3
Xh7U8Z+k81JHZiFTv4ke8Oj2fsfJhHajCJbWvI3kEhuCzq5/Z/mfIaqfukyUglJo
VyQHuSmisE/5H0/6mH4+0RT3FqrrrqprOJib4HZWmS42Dt5kVSjN4oexVUCtbg0V
UuESLZXfpD5wl3/6KqWQtRGO4E57buQz7mB29YNCiI/c1RqsZ60P0NM0jVis/xc=
=XYiI
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope.
  2011-03-31 18:45   ` [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope Tom de Vries
@ 2011-03-31 18:58     ` Jeff Law
  2011-04-05 11:44       ` Tom de Vries
  2011-04-01 14:55     ` Tom de Vries
  1 sibling, 1 reply; 64+ messages in thread
From: Jeff Law @ 2011-03-31 18:58 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 03/31/11 12:43, Tom de Vries wrote:
> Allows crossjump over fallthru paths.
OK.
jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNlM5KAAoJEBRtltQi2kC7G9wIAL6SEIlXEl5WvOwn2KdobL4w
DI6A/TAitu7jFZ9mMCpumDfSJpoqY471Qxu9rMBKtKyE/JqP1p4onjzh/pDzktz4
atBChVdWxMQq1HWOlzBZV9ue7dsIh1A6gSCkReZwXmQStgMofBNd5zpIGbl6KVV0
MyEhQjbgEYn5sSWx9drLDzWolJnfLQHNACvvlazQNkpZaAEowDLBDtYjr/xyaw60
hCyV4HWkPEVr4AqP6L9us0RmaDeRXWktydSUP4VPObGxgS0ckFKfdeyV9pDGixAw
OsiHFVkp7cs9/gMaj4stFi23wdk+CsCCxUXone3e0EKZKRY/7d2c9DKIbHswcuQ=
=mqyF
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case.
  2011-03-31 18:46   ` [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case Tom de Vries
@ 2011-03-31 19:00     ` Jeff Law
  2011-04-01 14:56     ` Tom de Vries
  1 sibling, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-03-31 19:00 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 03/31/11 12:44, Tom de Vries wrote:
> Test-case for crossjump-fallthru-ml.patch.
OK.
jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNlM5qAAoJEBRtltQi2kC74ksIAJHPSGtw95AknD0wil+UFHaR
gJtk6oJaaBKAtI1euogaiqvHRr6OaR8OrHS93vGIKZRAG5ji0qs5sEp66dr4v0Em
O/g9US7i0HrTFeNDYWp/2jarhCFbhRLzXRUIKEBw7vlrtUjIE3S7udV8w3yPwtnf
VXKQk79WWsDh2XJyYubznRBeS7zh+fspgLpN7grvduZaFSHwOyrAVQVmVROL5Ylq
U6R4pM7kok5K20/+xi+CfrUTpQean3H1onxmaq2rY7UOR5W1/Ia8Gd6jeMEgKcBL
n/E5vI5CFFam4qa+R4i1kNw6IKWWsAjJTxQSx8C5HvR41hnEAb1w4UD3sNRngFI=
=fGZq
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 9/9] Cross-jumping - Allow both directions.
  2011-03-31 18:56   ` [PATCH, PR43920, 9/9] Cross-jumping - Allow both directions Tom de Vries
@ 2011-03-31 19:02     ` Jeff Law
  2011-04-01 14:56     ` Tom de Vries
  1 sibling, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-03-31 19:02 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 03/31/11 12:45, Tom de Vries wrote:
> Allow crossjumping in both directions. Crossjump was assumed to be
> symmetric, and therefore only applied on edges e1,e2 and not on e2,e1.
> Now given both the fallthru fix and the regnotes fix, crossjumping is
> not symmetrical anymore, and we allow both directions (but not by
> testing twice).
OK.
jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNlM6qAAoJEBRtltQi2kC7GaEIAJDs2K21ezAc1ubdSXEbbr8Z
Zryi/r6Euzns9e3+heNYYugn33XyVLc5kGiEgM8Ul2QEfg2cY//PAW6nAaFUnqQn
EMlKIUMdGM3ijAXC2AdLQ+4fim1n3y6kxlFyZQOvlyGtm+bp+V+EgGtBt9yA9s5x
y0TopSk2OITup0Vo7KQikt5wvCtspoMRd7qYry9TNMFEZokaAgdaI2B00jK9+UCn
aO9unwBaYoZEiUGNQBW/xYazdOG68IV+Q4Dh+7UAKVQ5aBqoQcZ+olRuy6Dtcrr+
mys+bEMx7T3qT/fTzIAmsH40ZuRTUIat9zAa+5wNWxzr9c6zS7C+UR0pJxiYKmc=
=/cNV
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber.
  2011-03-31 18:40     ` Jeff Law
@ 2011-03-31 19:09       ` Tom de Vries
  2011-03-31 19:19         ` Jeff Law
  0 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 19:09 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches, ebotcazou

Hi Jeff,

On 03/31/2011 08:36 PM, Jeff Law wrote:

> On 03/31/11 12:35, Tom de Vries wrote:
>> Currently uses and clobbers are counted as insns in crossjumping, which
>> can cause undesirable crossjumping.  The patch fixes this.
> OK.  Please install.
> 
> jeff

I don't have an account at sourceware.org. Can I ask you to sponsor my
application?

Thanks,
- Tom

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber.
  2011-03-31 19:09       ` Tom de Vries
@ 2011-03-31 19:19         ` Jeff Law
  0 siblings, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-03-31 19:19 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 03/31/11 13:02, Tom de Vries wrote:
> Hi Jeff,
> 
> On 03/31/2011 08:36 PM, Jeff Law wrote:
> 
>> On 03/31/11 12:35, Tom de Vries wrote:
>>> Currently uses and clobbers are counted as insns in crossjumping, which
>>> can cause undesirable crossjumping.  The patch fixes this.
>> OK.  Please install.
>>
>> jeff
> 
> I don't have an account at sourceware.org. Can I ask you to sponsor my
> application?
Of course.  Though you'll want an account on gcc.gnu.org -- while
sourceware.org and gcc.gnu.org share a physical machine, most of their
infrastructure is separate...

jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNlNF0AAoJEBRtltQi2kC7SAwH/jK5o8ptAOTXlVpjF1rz6Vly
iUWPjStMShkyhYuq7qZh4g0mOqVc6yibrkiaXFlURYi+LCxgkA7cxmA7CxVeHReM
jl2FJKjQcHO1FJxOELfHY5U8J51sf7x7ccCiaNYau7Wmb0FylEPpWn/9p7Yjar0l
qNx+BmY6NyXRYddQI8pSxI8fl2ishOCi3bOAS4X36N+YxbfjYHkjxgQ3BC0W2uKp
b7hviW8QWaJx/cQr+dyxLQNrHrMZ20Vzur8mIFY1vn4+Ms/VQktLsOaXsX4egVRC
iDLLAsLtGq1xXjhEaaCDfEUK7U5V2ipNMEbiS+NwCu6R3B2oCTeFET6BL2eWTl4=
=VCx+
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920] Improve code-size optimizations
  2011-03-31 18:27 [PATCH, PR43920] Improve code-size optimizations Tom de Vries
                   ` (3 preceding siblings ...)
  2011-03-31 18:35 ` [PATCH, PR43920, 4-9/9] Cross-jumping Tom de Vries
@ 2011-03-31 21:16 ` Eric Botcazou
  2011-04-01 15:06   ` Tom de Vries
  4 siblings, 1 reply; 64+ messages in thread
From: Eric Botcazou @ 2011-03-31 21:16 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, Maxim Kuvyrkov, Bernd Schmidt

> Patches will be posted separately.

Always post a ChangeLog entry with a patch.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes.
  2011-03-31 18:56     ` Jeff Law
@ 2011-03-31 21:25       ` Tom de Vries
  2011-04-01 14:54         ` Tom de Vries
  0 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-03-31 21:25 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches, ebotcazou

On 03/31/2011 08:52 PM, Jeff Law wrote:

> On 03/31/11 12:42, Tom de Vries wrote:
>> Uses regnotes to analyze whether we can replace insn a by insn b, even
>> if we cannot replace insn b by insn a. Uses this info in crossjumping.

> Shouldn't this be using single_set rather than digging through PATTERN,
> then verifying both are SETs, etc.?
> 
> Otherwise don't you miss most of the benefit on architectures where most
> insns clobber the flags register in a PARALLEL with the SET?

I see what you mean about missing these insns currently.

I guess I will have to check that the non-SET part of the PARALLEL is
identical between the 2 insns.

I'll update the patch to handle this case.

Thanks,
- Tom

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-03-31 18:29 ` [PATCH, PR43920, 1/9] ARM specific part Tom de Vries
@ 2011-04-01 14:46   ` Tom de Vries
  2011-04-01 15:19     ` Richard Earnshaw
  0 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 14:46 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 27 bytes --]

Reposting, with ChangeLog.

[-- Attachment #2: 1_arm-size-branch_cost.patch --]
[-- Type: text/x-patch, Size: 773 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* config/arm/arm.h (BRANCH_COST): Set to 1 for Thumb-2 when optimizing
	for size.

Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h	(revision 293961)
+++ gcc/config/arm/arm.h	(revision 293962)
@@ -2201,7 +2201,8 @@ typedef struct
 /* Try to generate sequences that don't involve branches, we can then use
    conditional instructions */
 #define BRANCH_COST(speed_p, predictable_p) \
-  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
+  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
+		: (optimize > 0 ? 2 : 0))
 \f
 /* Position Independent Code.  */
 /* We decide which register to use based on the compilation options and

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 2/9] ARM specific part - test case
  2011-03-31 18:31 ` [PATCH, PR43920, 2/9] ARM specific part - test case Tom de Vries
@ 2011-04-01 14:47   ` Tom de Vries
  2011-04-01 15:17     ` Tom de Vries
  2011-04-01 15:34     ` Richard Earnshaw
  0 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 14:47 UTC (permalink / raw)
  To: gcc-patches, richard.earnshaw

[-- Attachment #1: Type: text/plain, Size: 27 bytes --]

Reposting, with ChangeLog.

[-- Attachment #2: 2_arm-size-branch_cost.test.patch --]
[-- Type: text/x-patch, Size: 782 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* gcc/testsuite/gcc.target/arm/pr43920-1.c: New test.

Index: gcc/testsuite/gcc.target/arm/pr43920-1.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-1.c	(revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv7-a -mthumb -Os" } */
+
+int
+f (int start, int end, int *start_)
+{
+  if (start == -1 || end == -1)
+    return -1;
+
+  if (end - start)
+    return -1;
+
+  *start_ = start;
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "\torr" 0 } } */
+/* { dg-final { scan-assembler-times "\tit\t" 0 } } */
+/* { dg-final { scan-assembler "\tbeq" } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 3/9] Cleanup.
  2011-03-31 18:35 ` [PATCH, PR43920, 3/9] Cleanup Tom de Vries
  2011-03-31 18:43   ` Jeff Law
@ 2011-04-01 14:48   ` Tom de Vries
  2011-04-01 15:15     ` Tom de Vries
  1 sibling, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 14:48 UTC (permalink / raw)
  To: gcc-patches, ebotcazou; +Cc: Maxim Kuvyrkov, Bernd Schmidt

[-- Attachment #1: Type: text/plain, Size: 27 bytes --]

Reposting, with ChangeLog.

[-- Attachment #2: 3_crossjump-cleanup-ml.patch --]
[-- Type: text/x-patch, Size: 1405 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* gcc/cfgcleanup.c (try_crossjump_bb): Remove 2 superfluous variables.

diff -u gcc/cfgcleanup.c gcc/cfgcleanup.c
--- gcc/cfgcleanup.c	(working copy)
+++ gcc/cfgcleanup.c	(working copy)
@@ -1961,7 +1961,6 @@
   edge e, e2, fallthru;
   bool changed;
   unsigned max, ix, ix2;
-  basic_block ev, ev2;
 
   /* Nothing to do if there is not at least two incoming edges.  */
   if (EDGE_COUNT (bb->preds) < 2)
@@ -2001,9 +2000,9 @@
   fallthru = find_fallthru_edge (bb->preds);
 
   changed = false;
-  for (ix = 0, ev = bb; ix < EDGE_COUNT (ev->preds); )
+  for (ix = 0; ix < EDGE_COUNT (bb->preds);)
     {
-      e = EDGE_PRED (ev, ix);
+      e = EDGE_PRED (bb, ix);
       ix++;
 
       /* As noted above, first try with the fallthru predecessor (or, a
@@ -2021,7 +2020,6 @@
 	    {
 	      changed = true;
 	      ix = 0;
-	      ev = bb;
 	      continue;
 	    }
 	}
@@ -2045,10 +2043,9 @@
       if (EDGE_SUCC (e->src, 0) != e)
 	continue;
 
-      for (ix2 = 0, ev2 = bb; ix2 < EDGE_COUNT (ev2->preds); )
+      for (ix2 = 0; ix2 < EDGE_COUNT (bb->preds); ix2++)
 	{
-	  e2 = EDGE_PRED (ev2, ix2);
-	  ix2++;
+	  e2 = EDGE_PRED (bb, ix2);
 
 	  if (e2 == e)
 	    continue;
@@ -2071,7 +2068,6 @@
 	  if (try_crossjump_to_edge (mode, e, e2))
 	    {
 	      changed = true;
-	      ev2 = bb;
 	      ix = 0;
 	      break;
 	    }

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber.
  2011-03-31 18:36   ` [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber Tom de Vries
  2011-03-31 18:40     ` Jeff Law
@ 2011-04-01 14:48     ` Tom de Vries
  2011-04-04 19:11       ` Jeff Law
  1 sibling, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 14:48 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 27 bytes --]

Reposting, with ChangeLog.

[-- Attachment #2: 4_crossjump-dont-count-use-clobber-ml.patch --]
[-- Type: text/x-patch, Size: 895 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* cfgcleanup.c (flow_find_cross_jump): Don't count USE or CLOBBER as
	insn.

Index: gcc/cfgcleanup.c
===================================================================
--- gcc/cfgcleanup.c	(revision 170556)
+++ gcc/cfgcleanup.c	(working copy)
@@ -1074,6 +1074,7 @@ flow_find_cross_jump (basic_block bb1, b
 {
   rtx i1, i2, last1, last2, afterlast1, afterlast2;
   int ninsns = 0;
+  rtx p1;
 
   /* Skip simple jumps at the end of the blocks.  Complex jumps still
      need to be compared for equivalence, which we'll do below.  */
@@ -1122,7 +1123,9 @@ flow_find_cross_jump (basic_block bb1, b
 
 	  afterlast1 = last1, afterlast2 = last2;
 	  last1 = i1, last2 = i2;
-	  ninsns++;
+	  p1 = PATTERN (i1);
+	  if (!(GET_CODE (p1) == USE || GET_CODE (p1) == CLOBBER))
+            ninsns++;
 	}
 
       i1 = PREV_INSN (i1);

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 5/9] Cross-jumping - Add missing use of return register.
  2011-03-31 18:42   ` [PATCH, PR43920, 5/9] Cross-jumping - Add missing use of return register Tom de Vries
  2011-03-31 18:52     ` Jeff Law
@ 2011-04-01 14:49     ` Tom de Vries
  2011-04-04 19:11       ` Jeff Law
  1 sibling, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 14:49 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 27 bytes --]

Reposting, with ChangeLog.

[-- Attachment #2: 5_crossjump-use-return-register-ml.patch --]
[-- Type: text/x-patch, Size: 1866 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* function.c (emit_use_return_register_into_block): New function.
	(thread_prologue_and_epilogue_insns): Use
	emit_use_return_register_into_block.

Index: gcc/function.c
===================================================================
--- gcc/function.c	(revision 170556)
+++ gcc/function.c	(working copy)
@@ -5241,6 +5241,19 @@ prologue_epilogue_contains (const_rtx in
   return 0;
 }
 
+/* Insert use of return register before the end of BB.  */
+
+static void
+emit_use_return_register_into_block (basic_block bb)
+{
+  rtx seq;
+  start_sequence ();
+  use_return_register ();
+  seq = get_insns ();
+  end_sequence ();
+  emit_insn_before (seq, BB_END (bb));
+}
+
 #ifdef HAVE_return
 /* Insert gen_return at the end of block BB.  This also means updating
    block_for_insn appropriately.  */
@@ -5395,6 +5408,15 @@ thread_prologue_and_epilogue_insns (void
 		 with a simple return instruction.  */
 	      if (simplejump_p (jump))
 		{
+		  /* The use of the return register might be present in the exit
+		     fallthru block.  Either:
+		     - removing the use is safe, and we should remove the use in
+		       the exit fallthru block, or
+		     - removing the use is not safe, and we should add it here.
+		     For now, we conservatively choose the latter.  Either of the
+		     2 helps in crossjumping.  */
+		  emit_use_return_register_into_block (bb);
+
 		  emit_return_into_block (bb);
 		  delete_insn (jump);
 		}
@@ -5409,6 +5431,9 @@ thread_prologue_and_epilogue_insns (void
 		      continue;
 		    }
 
+                  /* See comment in simple_jump_p case above.  */
+		  emit_use_return_register_into_block (bb);
+
 		  /* If this block has only one successor, it both jumps
 		     and falls through to the fallthru block, so we can't
 		     delete the edge.  */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes.
  2011-03-31 21:25       ` Tom de Vries
@ 2011-04-01 14:54         ` Tom de Vries
  2011-04-04 16:14           ` Tom de Vries
  2011-04-06 17:41           ` Jeff Law
  0 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 14:54 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 925 bytes --]

On 03/31/2011 11:16 PM, Tom de Vries wrote:
> On 03/31/2011 08:52 PM, Jeff Law wrote:
> 
>> On 03/31/11 12:42, Tom de Vries wrote:
>>> Uses regnotes to analyze whether we can replace insn a by insn b, even
>>> if we cannot replace insn b by insn a. Uses this info in crossjumping.
> 
>> Shouldn't this be using single_set rather than digging through PATTERN,
>> then verifying both are SETs, etc.?
>>
>> Otherwise don't you miss most of the benefit on architectures where most
>> insns clobber the flags register in a PARALLEL with the SET?
> 
> I see what you mean about missing these insns currently.
> 
> I guess I will have to check that the non-SET part of the PARALLEL is
> identical between the 2 insns.
> 
> I'll update the patch to handle this case.

changes compared to previous posting:
- add ChangeLog.
- use single_set
- add equal_different_set_p and use it in can_replace_by

Retested on x86_64.

Thanks,
- Tom

[-- Attachment #2: 6_crossjump-regnotes-ml.patch --]
[-- Type: text/x-patch, Size: 11553 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* cfgcleanup.c (equal_different_set_p, can_replace_by, merge_dir): New
	function.
	(old_insns_match_p): Change return type.  Replace return false/true with
	return dir_none/dir_both.  Use can_replace_by.
	(flow_find_cross_jump): Add dir_p parameter.  Init replacement direction
	from dir_p.  Register replacement direction in dir, last_dir and
	afterlast_dir.	Handle new return type of old_insns_match_p using
	merge_dir.  Return replacement direction in dir_p.
	(flow_find_head_matching_sequence, outgoing_edges_match): Handle new
	return type of old_insns_match_p.
	(try_crossjump_to_edge): Add argument to call to flow_find_cross_jump.
	* ifcvt.c ( cond_exec_process_if_block): Add argument to call to
	flow_find_cross_jump.
	* basic-block.h (enum replace_direction): New type.
	(flow_find_cross_jump): Add parameter to declaration.

diff -u gcc/cfgcleanup.c gcc/cfgcleanup.c
--- gcc/cfgcleanup.c	(working copy)
+++ gcc/cfgcleanup.c	(working copy)
@@ -72,7 +72,7 @@
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
-static bool old_insns_match_p (int, rtx, rtx);
+static enum replace_direction old_insns_match_p (int, rtx, rtx);
 
 static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block);
 static void merge_blocks_move_successor_nojumps (basic_block, basic_block);
@@ -950,27 +950,143 @@
 }
 
 
+ /* Checks if patterns P1 and P2 are equivalent, apart from the possibly
+    different single sets S1 and S2.  */
+
+static bool
+equal_different_set_p (rtx p1, rtx s1, rtx p2, rtx s2)
+{
+  int i;
+  rtx e1, e2;
+
+  if (p1 == s1 && p2 == s2)
+    return true;
+
+  if (GET_CODE (p1) != PARALLEL || GET_CODE (p2) != PARALLEL)
+    return false;
+
+  if (XVECLEN (p1, 0) != XVECLEN (p2, 0))
+    return false;
+
+  for (i = 0; i < XVECLEN (p1, 0); i++)
+    {
+      e1 = XVECEXP (p1, 0, i);
+      e2 = XVECEXP (p2, 0, i);
+      if (e1 == s1 && e2 == s2)
+        continue;
+      if (reload_completed
+          ? rtx_renumbered_equal_p (e1, e2) : rtx_equal_p (e1, e2))
+        continue;
+
+        return false;
+    }
+
+  return true;
+}
+
+/* Examine register notes on I1 and I2 and return:
+   - dir_forward if I1 can be replaced by I2, or
+   - dir_backward if I2 can be replaced by I1, or
+   - dir_both if both are the case.  */
+
+static enum replace_direction
+can_replace_by (rtx i1, rtx i2)
+{
+  rtx s1, s2, d1, d2, src1, src2, note1, note2;
+  bool c1, c2;
+
+  /* Check for 2 sets.  */
+  s1 = single_set (i1);
+  s2 = single_set (i2);
+  if (s1 == NULL_RTX || s2 == NULL_RTX)
+    return dir_none;
+
+  /* Check that the 2 sets set the same dest.  */
+  d1 = SET_DEST (s1);
+  d2 = SET_DEST (s2);
+  if (!(reload_completed
+        ? rtx_renumbered_equal_p (d1, d2) : rtx_equal_p (d1, d2)))
+    return dir_none;
+
+  /* Find identical req_equiv or reg_equal note, which implies that the 2 sets
+     set dest to the same value.  */
+  note1 = find_reg_equal_equiv_note (i1);
+  note2 = find_reg_equal_equiv_note (i2);
+  if (!note1 || !note2 || !rtx_equal_p (XEXP (note1, 0), XEXP (note2, 0))
+      || !CONST_INT_P (XEXP (note1, 0)))
+    return dir_none;
+
+  if (!equal_different_set_p (PATTERN (i1), s1, PATTERN (i2), s2))
+    return dir_none;
+
+  /* Although the 2 sets set dest to the same value, we cannot replace
+       (set (dest) (const_int))
+     by
+       (set (dest) (reg))
+     because we don't know if the reg is live and has the same value at the
+     location of replacement.  */
+  src1 = SET_SRC (s1);
+  src2 = SET_SRC (s2);
+  c1 = CONST_INT_P (src1);
+  c2 = CONST_INT_P (src2);
+  if (c1 && c2)
+    return dir_both;
+  else if (c2)
+    return dir_forward;
+  else if (c1)
+    return dir_backward;
+
+  return dir_none;
+}
+
+/* Merges directions A and B.  */
+
+static enum replace_direction
+merge_dir (enum replace_direction a, enum replace_direction b)
+{
+  /* Implements the following table:
+        |bo fw bw no
+     ---+-----------
+     bo |bo fw bw no
+     fw |-- fw no no
+     bw |-- -- bw no
+     no |-- -- -- no.  */
+
+  if (a == b)
+    return a;
+
+  if (a == dir_both)
+    return b;
+  if (b == dir_both)
+    return a;
+
+  return dir_none;
+}
+
-/* Return true if I1 and I2 are equivalent and thus can be crossjumped.  */
+/* Examine I1 and I2 and return:
+   - dir_forward if I1 can be replaced by I2, or
+   - dir_backward if I2 can be replaced by I1, or
+   - dir_both if both are the case.  */
 
-static bool
+static enum replace_direction
 old_insns_match_p (int mode ATTRIBUTE_UNUSED, rtx i1, rtx i2)
 {
   rtx p1, p2;
 
   /* Verify that I1 and I2 are equivalent.  */
   if (GET_CODE (i1) != GET_CODE (i2))
-    return false;
+    return dir_none;
 
   /* __builtin_unreachable() may lead to empty blocks (ending with
      NOTE_INSN_BASIC_BLOCK).  They may be crossjumped. */
   if (NOTE_INSN_BASIC_BLOCK_P (i1) && NOTE_INSN_BASIC_BLOCK_P (i2))
-    return true;
+    return dir_both;
 
   p1 = PATTERN (i1);
   p2 = PATTERN (i2);
 
   if (GET_CODE (p1) != GET_CODE (p2))
-    return false;
+    return dir_none;
 
   /* If this is a CALL_INSN, compare register usage information.
      If we don't check this on stack register machines, the two
@@ -991,15 +1107,15 @@
       rtx n2 = find_reg_note (i2, REG_EH_REGION, 0);
 
       if (!n1 && n2)
-	return false;
+	return dir_none;
 
       if (n1 && (!n2 || XEXP (n1, 0) != XEXP (n2, 0)))
-	return false;
+	return dir_none;
 
       if (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
 			CALL_INSN_FUNCTION_USAGE (i2))
 	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2))
-	return false;
+	return dir_none;
     }
 
 #ifdef STACK_REGS
@@ -1028,15 +1144,15 @@
 	  SET_HARD_REG_BIT (i2_regset, REGNO (XEXP (note, 0)));
 
       if (!hard_reg_set_equal_p (i1_regset, i2_regset))
-	return false;
+	return dir_none;
     }
 #endif
 
   if (reload_completed
       ? rtx_renumbered_equal_p (p1, p2) : rtx_equal_p (p1, p2))
-    return true;
+    return dir_both;
 
-  return false;
+  return can_replace_by (i1, i2);
 }
 \f
 /* When comparing insns I1 and I2 in flow_find_cross_jump or
@@ -1063,18 +1179,32 @@
 }
 
 /* Look through the insns at the end of BB1 and BB2 and find the longest
-   sequence that are equivalent.  Store the first insns for that sequence
-   in *F1 and *F2 and return the sequence length.
+   sequence that are either equivalent, or allow forward or backward
+   replacement.  Store the first insns for that sequence in *F1 and *F2 and
+   return the sequence length.
+
+   DIR_P indicates the allowed replacement direction on function entry, and
+   the actual replacement direction on function exit.  If NULL, only equivalent
+   sequences are allowed.
 
    To simplify callers of this function, if the blocks match exactly,
    store the head of the blocks in *F1 and *F2.  */
 
 int
-flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx *f1, rtx *f2)
+flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx *f1, rtx *f2,
+                      enum replace_direction *dir_p)
 {
   rtx i1, i2, last1, last2, afterlast1, afterlast2;
   int ninsns = 0;
   rtx p1;
+  enum replace_direction dir, last_dir, afterlast_dir;
+
+  if (dir_p)
+    dir = *dir_p;
+  else
+    dir = dir_both;
+  afterlast_dir = dir;
+  last_dir = afterlast_dir;
 
   /* Skip simple jumps at the end of the blocks.  Complex jumps still
      need to be compared for equivalence, which we'll do below.  */
@@ -1111,7 +1241,8 @@
       if (i1 == BB_HEAD (bb1) || i2 == BB_HEAD (bb2))
 	break;
 
-      if (!old_insns_match_p (0, i1, i2))
+      dir = merge_dir (dir, old_insns_match_p (0, i1, i2));
+      if (dir == dir_none || (!dir_p && dir != dir_both))
 	break;
 
       merge_memattrs (i1, i2);
@@ -1123,6 +1254,8 @@
 
 	  afterlast1 = last1, afterlast2 = last2;
 	  last1 = i1, last2 = i2;
+	  afterlast_dir = last_dir;
+	  last_dir = dir;
 	  p1 = PATTERN (i1);
 	  if (!(GET_CODE (p1) == USE || GET_CODE (p1) == CLOBBER))
             ninsns++;
@@ -1136,7 +1269,7 @@
   /* Don't allow the insn after a compare to be shared by
      cross-jumping unless the compare is also shared.  */
   if (ninsns && reg_mentioned_p (cc0_rtx, last1) && ! sets_cc0_p (last1))
-    last1 = afterlast1, last2 = afterlast2, ninsns--;
+    last1 = afterlast1, last2 = afterlast2, last_dir = afterlast_dir, ninsns--;
 #endif
 
   /* Include preceding notes and labels in the cross-jump.  One,
@@ -1162,7 +1295,9 @@
       *f2 = last2;
     }
 
+  if (dir_p)
+    *dir_p = last_dir;
   return ninsns;
 }
 
       /* Ignore notes.  */
@@ -1226,7 +1361,7 @@
 	      && nehedges1 != nehedges2))
 	break;
 
-      if (!old_insns_match_p (0, i1, i2))
+      if (old_insns_match_p (0, i1, i2) != dir_both)
 	break;
 
       merge_memattrs (i1, i2);
@@ -1455,7 +1590,8 @@
 		  rr.update_label_nuses = false;
 		  for_each_rtx (&BB_END (bb1), replace_label, &rr);
 
-		  match = old_insns_match_p (mode, BB_END (bb1), BB_END (bb2));
+		  match = (old_insns_match_p (mode, BB_END (bb1), BB_END (bb2))
+			   == dir_both);
 		  if (dump_file && match)
 		    fprintf (dump_file,
 			     "Tablejumps in bb %i and %i match.\n",
@@ -1477,7 +1613,7 @@
 
   /* First ensure that the instructions match.  There may be many outgoing
      edges so this test is generally cheaper.  */
-  if (!old_insns_match_p (mode, BB_END (bb1), BB_END (bb2)))
+  if (old_insns_match_p (mode, BB_END (bb1), BB_END (bb2)) != dir_both)
     return false;
 
   /* Search the outgoing edges, ensure that the counts do match, find possible
@@ -1578,6 +1714,7 @@
   int nmatch;
   basic_block src1 = e1->src, src2 = e2->src;
   basic_block redirect_to, redirect_from, to_remove;
+  enum replace_direction dir;
   rtx newpos1, newpos2;
   edge s;
   edge_iterator ei;
@@ -1633,7 +1770,8 @@
     return false;
 
   /* ... and part the second.  */
-  nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2);
+  dir = dir_forward;
+  nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2, &dir);
 
   /* Don't proceed with the crossjump unless we found a sufficient number
      of matching instructions or the 'from' block was totally matched
--- gcc/ifcvt.c	(revision 170556)
+++ gcc/ifcvt.c	(working copy)
@@ -476,7 +476,8 @@ cond_exec_process_if_block (ce_if_block_
       /* Look for matching sequences at the head and tail of the two blocks,
 	 and limit the range of insns to be converted if possible.  */
       n_matching = flow_find_cross_jump (then_bb, else_bb,
-					 &then_first_tail, &else_first_tail);
+					 &then_first_tail, &else_first_tail,
+                                         NULL);
       if (then_first_tail == BB_HEAD (then_bb))
 	then_start = then_end = NULL_RTX;
       if (else_first_tail == BB_HEAD (else_bb))
--- gcc/basic-block.h	(revision 170556)
+++ gcc/basic-block.h	(working copy)
@@ -803,9 +803,12 @@ extern bool purge_dead_edges (basic_bloc
 extern void find_many_sub_basic_blocks (sbitmap);
 extern void rtl_make_eh_edge (sbitmap, basic_block, rtx);
 
+enum replace_direction { dir_none, dir_forward, dir_backward, dir_both };
+
 /* In cfgcleanup.c.  */
 extern bool cleanup_cfg (int);
-extern int flow_find_cross_jump (basic_block, basic_block, rtx *, rtx *);
+extern int flow_find_cross_jump (basic_block, basic_block, rtx *, rtx *,
+                                 enum replace_direction*);
 extern int flow_find_head_matching_sequence (basic_block, basic_block,
 					     rtx *, rtx *, int);
 

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope.
  2011-03-31 18:45   ` [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope Tom de Vries
  2011-03-31 18:58     ` Jeff Law
@ 2011-04-01 14:55     ` Tom de Vries
  2011-04-05 21:45       ` Jeff Law
  1 sibling, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 14:55 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 28 bytes --]

Reposting, with ChangeLog.


[-- Attachment #2: 7_crossjump-fallthru-ml.patch --]
[-- Type: text/x-patch, Size: 7299 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* cfgcleanup.c (walk_to_nondebug_insn): New function.
	(flow_find_cross_jump): Use walk_to_nondebug_insn.  Recalculate bb1 and
	bb2.
	(try_crossjump_to_edge): Handle case that newpos1 or newpos2 is not src1
	or src2.  Redirect edges to the last basic block.  Update frequency and
	count on multiple basic blocks in case of fallthru.

diff -u gcc/cfgcleanup.c gcc/cfgcleanup.c
--- gcc/cfgcleanup.c	(working copy)
+++ gcc/cfgcleanup.c	(working copy)
@@ -1139,6 +1139,43 @@
     }
 }
 
+ /* Walks from I1 in BB1 backward till the next non-debug insn, and returns the
+    resulting insn in I1, and the corresponding bb in BB1.  At the head of a
+    bb, if there is a predecessor bb that reaches this bb via fallthru, and
+    FOLLOW_FALLTHRU, walks further in the predecessor bb and registers this in
+    DID_FALLTHRU.  Otherwise, stops at the head of the bb.  */
+
+static void
+walk_to_nondebug_insn (rtx *i1, basic_block *bb1, bool follow_fallthru,
+                       bool *did_fallthru)
+{
+  edge fallthru;
+
+  *did_fallthru = false;
+
+  /* Ignore notes.  */
+  while (!NONDEBUG_INSN_P (*i1))
+    {
+      if (*i1 != BB_HEAD (*bb1))
+        {
+          *i1 = PREV_INSN (*i1);
+          continue;
+        }
+
+      if (!follow_fallthru)
+        return;
+
+      fallthru = find_fallthru_edge ((*bb1)->preds);
+      if (!fallthru || fallthru->src == ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
+          || !single_succ_p (fallthru->src))
+        return;
+
+      *bb1 = fallthru->src;
+      *i1 = BB_END (*bb1);
+      *did_fallthru = true;
+     }
+}
+
 /* Look through the insns at the end of BB1 and BB2 and find the longest
    sequence that are equivalent.  Store the first insns for that sequence
    in *F1 and *F2 and return the sequence length.
@@ -1153,6 +1190,7 @@
   rtx i1, i2, last1, last2, afterlast1, afterlast2;
   int ninsns = 0;
   enum replace_direction dir, last_dir, afterlast_dir;
+  bool follow_fallthru, did_fallthru;
 
   if (dir_p)
     dir = *dir_p;
@@ -1187,11 +1225,30 @@
   while (true)
     {
-      /* Ignore notes.  */
-      while (!NONDEBUG_INSN_P (i1) && i1 != BB_HEAD (bb1))
-	i1 = PREV_INSN (i1);
-
-      while (!NONDEBUG_INSN_P (i2) && i2 != BB_HEAD (bb2))
-	i2 = PREV_INSN (i2);
+      /* In the following example, we can replace all jumps to C by jumps to A.
+
+         This removes 4 duplicate insns.
+         [bb A] insn1            [bb C] insn1
+                insn2                   insn2
+         [bb B] insn3                   insn3
+                insn4                   insn4
+                jump_insn               jump_insn
+
+         We could also replace all jumps to A by jumps to C, but that leaves B
+         alive, and removes only 2 duplicate insns.  In a subsequent crossjump
+         step, all jumps to B would be replaced with jumps to the middle of C,
+         achieving the same result with more effort.
+         So we allow only the first possibility, which means that we don't allow
+         fallthru in the block that's being replaced.  */
+
+      follow_fallthru = dir_p && dir != dir_forward;
+      walk_to_nondebug_insn (&i1, &bb1, follow_fallthru, &did_fallthru);
+      if (did_fallthru)
+        dir = dir_backward;
+
+      follow_fallthru = dir_p && dir != dir_backward;
+      walk_to_nondebug_insn (&i2, &bb2, follow_fallthru, &did_fallthru);
+      if (did_fallthru)
+        dir = dir_forward;
 
       if (i1 == BB_HEAD (bb1) || i2 == BB_HEAD (bb2))
 	break;
@@ -1230,12 +1287,14 @@
      Two, it keeps line number notes as matched as may be.  */
   if (ninsns)
     {
+      bb1 = BLOCK_FOR_INSN (last1);
       while (last1 != BB_HEAD (bb1) && !NONDEBUG_INSN_P (PREV_INSN (last1)))
 	last1 = PREV_INSN (last1);
 
       if (last1 != BB_HEAD (bb1) && LABEL_P (PREV_INSN (last1)))
 	last1 = PREV_INSN (last1);
 
+      bb2 = BLOCK_FOR_INSN (last2);
       while (last2 != BB_HEAD (bb2) && !NONDEBUG_INSN_P (PREV_INSN (last2)))
 	last2 = PREV_INSN (last2);
 
@@ -1659,6 +1718,7 @@
   int nmatch;
   basic_block src1 = e1->src, src2 = e2->src;
   basic_block redirect_to, redirect_from, to_remove;
+  basic_block osrc1, osrc2, redirect_edges_to, tmp;
   enum replace_direction dir;
   rtx newpos1, newpos2;
   edge s;
@@ -1720,8 +1780,15 @@
     return false;
 
   /* ... and part the second.  */
   dir = dir_forward;
   nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2, &dir);
+
+  osrc1 = src1;
+  osrc2 = src2;
+  if (newpos1 != NULL_RTX)
+    src1 = BLOCK_FOR_INSN (newpos1);
+  if (newpos2 != NULL_RTX)
+    src2 = BLOCK_FOR_INSN (newpos2);
 
   /* Don't proceed with the crossjump unless we found a sufficient number
      of matching instructions or the 'from' block was totally matched
@@ -1745,8 +1812,8 @@
       rtx label1, label2;
       rtx table1, table2;
 
-      if (tablejump_p (BB_END (src1), &label1, &table1)
-	  && tablejump_p (BB_END (src2), &label2, &table2)
+      if (tablejump_p (BB_END (osrc1), &label1, &table1)
+	  && tablejump_p (BB_END (osrc2), &label2, &table2)
 	  && label1 != label2)
 	{
 	  replace_label_data rr;
@@ -1761,7 +1828,7 @@
 	      /* Do not replace the label in SRC1->END because when deleting
 		 a block whose end is a tablejump, the tablejump referenced
 		 from the instruction is deleted too.  */
-	      if (insn != BB_END (src1))
+	      if (insn != BB_END (osrc1))
 		for_each_rtx (&insn, replace_label, &rr);
 	    }
 	}
@@ -1802,8 +1869,13 @@
   /* We may have some registers visible through the block.  */
   df_set_bb_dirty (redirect_to);
 
+  if (osrc2 == src2)
+    redirect_edges_to = redirect_to;
+  else
+    redirect_edges_to = osrc2;
+
   /* Recompute the frequencies and counts of outgoing edges.  */
-  FOR_EACH_EDGE (s, ei, redirect_to->succs)
+  FOR_EACH_EDGE (s, ei, redirect_edges_to->succs)
     {
       edge s2;
       edge_iterator ei;
@@ -1846,24 +1918,32 @@
 	    s2->dest->count = 0;
 	}
 
-      if (!redirect_to->frequency && !src1->frequency)
+      if (!redirect_edges_to->frequency && !src1->frequency)
 	s->probability = (s->probability + s2->probability) / 2;
       else
 	s->probability
-	  = ((s->probability * redirect_to->frequency +
+	  = ((s->probability * redirect_edges_to->frequency +
 	      s2->probability * src1->frequency)
-	     / (redirect_to->frequency + src1->frequency));
+	     / (redirect_edges_to->frequency + src1->frequency));
     }
 
   /* Adjust count and frequency for the block.  An earlier jump
      threading pass may have left the profile in an inconsistent
      state (see update_bb_profile_for_threading) so we must be
      prepared for overflows.  */
-  redirect_to->count += src1->count;
-  redirect_to->frequency += src1->frequency;
-  if (redirect_to->frequency > BB_FREQ_MAX)
-    redirect_to->frequency = BB_FREQ_MAX;
-  update_br_prob_note (redirect_to);
+  tmp = redirect_to;
+  do
+    {
+      tmp->count += src1->count;
+      tmp->frequency += src1->frequency;
+      if (tmp->frequency > BB_FREQ_MAX)
+        tmp->frequency = BB_FREQ_MAX;
+      if (tmp == redirect_edges_to)
+        break;
+      tmp = find_fallthru_edge (tmp->succs)->dest;
+    }
+  while (true);
+  update_br_prob_note (redirect_edges_to);
 
   /* Edit SRC1 to go to REDIRECT_TO at NEWPOS1.  */
 

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case.
  2011-03-31 18:46   ` [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case Tom de Vries
  2011-03-31 19:00     ` Jeff Law
@ 2011-04-01 14:56     ` Tom de Vries
  2011-04-01 15:01       ` Jakub Jelinek
  1 sibling, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 14:56 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 27 bytes --]

Reposting, with ChangeLog.

[-- Attachment #2: 8_crossjump-fallthru-ml.test.patch --]
[-- Type: text/x-patch, Size: 982 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* testsuite/gcc.target/arm/pr43920-2.c: New test.

Index: gcc/testsuite/gcc.target/arm/pr43920-2.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv7-a -mthumb -Os" } */
+
+#include <stdio.h>
+
+int getFileStartAndLength (int fd, int *start_, size_t *length_)
+{
+      int start, end;
+      size_t length;
+
+      start = lseek (fd, 0L, SEEK_CUR);
+      end = lseek (fd, 0L, SEEK_END);
+
+      if (start == -1 || end == -1)
+         return -1;
+
+      length = end - start;
+      if (length == 0)
+         return -1;
+
+      *start_ = start;
+      *length_ = length;
+
+      return 0;
+}
+
+/* { dg-final { scan-assembler-times "pop" 2 } } */
+/* { dg-final { scan-assembler-times "beq" 3 } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 9/9] Cross-jumping - Allow both directions.
  2011-03-31 18:56   ` [PATCH, PR43920, 9/9] Cross-jumping - Allow both directions Tom de Vries
  2011-03-31 19:02     ` Jeff Law
@ 2011-04-01 14:56     ` Tom de Vries
  2011-04-05 21:46       ` Jeff Law
  1 sibling, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 14:56 UTC (permalink / raw)
  To: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 27 bytes --]

Reposting, with ChangeLog.

[-- Attachment #2: 9_crossjump-backward-ml.patch --]
[-- Type: text/x-patch, Size: 2985 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* cfgcleanup.c (try_crossjump_to_edge): Add dir parameter.  Pass dir to
	flow_find_cross_jump.  Swap variables to implement backward replacement.
	(try_crossjump_bb): Add argument to try_crossjump_to_edge.

diff -u gcc/cfgcleanup.c gcc/cfgcleanup.c
--- gcc/cfgcleanup.c	(working copy)
+++ gcc/cfgcleanup.c	(working copy)
@@ -69,7 +69,7 @@
    information; we should run df_analyze to enable more opportunities.  */
 static bool block_was_dirty;
 
-static bool try_crossjump_to_edge (int, edge, edge);
+static bool try_crossjump_to_edge (int, edge, edge, enum replace_direction);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
 static enum replace_direction old_insns_match_p (int, rtx, rtx);
@@ -1695,15 +1695,17 @@
 /* E1 and E2 are edges with the same destination block.  Search their
    predecessors for common code.  If found, redirect control flow from
-   (maybe the middle of) E1->SRC to (maybe the middle of) E2->SRC.  */
+   (maybe the middle of) E1->SRC to (maybe the middle of) E2->SRC (dir_forward),
+   or the other way around (dir_backward).  DIR specifies the allowed
+   replacement direction.  */
 
 static bool
-try_crossjump_to_edge (int mode, edge e1, edge e2)
+try_crossjump_to_edge (int mode, edge e1, edge e2,
+                       enum replace_direction dir)
 {
   int nmatch;
   basic_block src1 = e1->src, src2 = e2->src;
   basic_block redirect_to, redirect_from, to_remove;
   basic_block osrc1, osrc2, redirect_edges_to, tmp;
-  enum replace_direction dir;
   rtx newpos1, newpos2;
   edge s;
   edge_iterator ei;
@@ -1757,8 +1759,7 @@
     return false;
 
   /* ... and part the second.  */
-  dir = dir_forward;
   nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2, &dir);
 
   osrc1 = src1;
   osrc2 = src2;
@@ -1767,5 +1768,15 @@
   if (newpos2 != NULL_RTX)
     src2 = BLOCK_FOR_INSN (newpos2);
 
+  if (dir == dir_backward)
+    {
+#define SWAP(T, X, Y) do { T tmp = (X); (X) = (Y); (Y) = tmp; } while (0)
+      SWAP (basic_block, osrc1, osrc2);
+      SWAP (basic_block, src1, src2);
+      SWAP (edge, e1, e2);
+      SWAP (rtx, newpos1, newpos2);
+#undef SWAP
+    }
+
   /* Don't proceed with the crossjump unless we found a sufficient number
      of matching instructions or the 'from' block was totally matched
@@ -2020,7 +2031,7 @@
 		   || (fallthru->src->flags & BB_MODIFIED)))
 	    continue;
 
-	  if (try_crossjump_to_edge (mode, e, fallthru))
+	  if (try_crossjump_to_edge (mode, e, fallthru, dir_forward))
 	    {
 	      changed = true;
 	      ix = 0;
@@ -2068,7 +2079,9 @@
 		   || (e2->src->flags & BB_MODIFIED)))
 	    continue;
 
-	  if (try_crossjump_to_edge (mode, e, e2))
+	  /* Both e and e2 are not fallthru edges, so we can crossjump in either
+	     direction.  */
+	  if (try_crossjump_to_edge (mode, e, e2, dir_both))
 	    {
 	      changed = true;
 	      ix = 0;

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case.
  2011-04-01 14:56     ` Tom de Vries
@ 2011-04-01 15:01       ` Jakub Jelinek
  2011-04-01 15:18         ` Tom de Vries
  0 siblings, 1 reply; 64+ messages in thread
From: Jakub Jelinek @ 2011-04-01 15:01 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

On Fri, Apr 01, 2011 at 04:56:10PM +0200, Tom de Vries wrote:
> Reposting, with ChangeLog.

> 2011-04-01  Tom de Vries  <tom@codesourcery.com>
> 
> 	PR target/43920
> 	* testsuite/gcc.target/arm/pr43920-2.c: New test.

gcc/testsuite/ has its own ChangeLog, so the ChangeLog entry
should say just
	* gcc.target/arm/pr43920-2.c: New test.
Similarly one of your patches had
	* gcc/cfgcleanup.c (whatever): Whatever.
in the ChangeLog, should be just
	* cfgcleanup.c (whatever): Whatever.

	Jakub

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920] Improve code-size optimizations
  2011-03-31 21:16 ` [PATCH, PR43920] Improve code-size optimizations Eric Botcazou
@ 2011-04-01 15:06   ` Tom de Vries
  2011-04-01 16:06     ` Eric Botcazou
  0 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 15:06 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Maxim Kuvyrkov, Bernd Schmidt

Hi Eric,

On 03/31/2011 11:02 PM, Eric Botcazou wrote:
>> Patches will be posted separately.
> 
> Always post a ChangeLog entry with a patch.
> 

Sorry about that. I reposted the patches with ChangeLog entries.

Thanks,
- Tom

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 3/9] Cleanup.
  2011-04-01 14:48   ` Tom de Vries
@ 2011-04-01 15:15     ` Tom de Vries
  2011-04-04 18:26       ` Jeff Law
  0 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 15:15 UTC (permalink / raw)
  To: gcc-patches, ebotcazou; +Cc: Maxim Kuvyrkov, Bernd Schmidt

[-- Attachment #1: Type: text/plain, Size: 340 bytes --]

On 04/01/2011 05:01 PM, Jakub Jelinek wrote:
> On Fri, Apr 01, 2011 at 04:56:10PM +0200, Tom de Vries wrote:
>> Reposting, with ChangeLog.
>
> Similarly one of your patches had
> 	* gcc/cfgcleanup.c (whatever): Whatever.
> in the ChangeLog, should be just
> 	* cfgcleanup.c (whatever): Whatever.
>
> 	Jakub

Fixed ChangeLog.

Thanks,
- Tom

[-- Attachment #2: 3_crossjump-cleanup-ml.patch --]
[-- Type: text/x-patch, Size: 1401 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* cfgcleanup.c (try_crossjump_bb): Remove 2 superfluous variables.

diff -u gcc/cfgcleanup.c gcc/cfgcleanup.c
--- gcc/cfgcleanup.c	(working copy)
+++ gcc/cfgcleanup.c	(working copy)
@@ -1961,7 +1961,6 @@
   edge e, e2, fallthru;
   bool changed;
   unsigned max, ix, ix2;
-  basic_block ev, ev2;
 
   /* Nothing to do if there is not at least two incoming edges.  */
   if (EDGE_COUNT (bb->preds) < 2)
@@ -2001,9 +2000,9 @@
   fallthru = find_fallthru_edge (bb->preds);
 
   changed = false;
-  for (ix = 0, ev = bb; ix < EDGE_COUNT (ev->preds); )
+  for (ix = 0; ix < EDGE_COUNT (bb->preds);)
     {
-      e = EDGE_PRED (ev, ix);
+      e = EDGE_PRED (bb, ix);
       ix++;
 
       /* As noted above, first try with the fallthru predecessor (or, a
@@ -2021,7 +2020,6 @@
 	    {
 	      changed = true;
 	      ix = 0;
-	      ev = bb;
 	      continue;
 	    }
 	}
@@ -2045,10 +2043,9 @@
       if (EDGE_SUCC (e->src, 0) != e)
 	continue;
 
-      for (ix2 = 0, ev2 = bb; ix2 < EDGE_COUNT (ev2->preds); )
+      for (ix2 = 0; ix2 < EDGE_COUNT (bb->preds); ix2++)
 	{
-	  e2 = EDGE_PRED (ev2, ix2);
-	  ix2++;
+	  e2 = EDGE_PRED (bb, ix2);
 
 	  if (e2 == e)
 	    continue;
@@ -2071,7 +2068,6 @@
 	  if (try_crossjump_to_edge (mode, e, e2))
 	    {
 	      changed = true;
-	      ev2 = bb;
 	      ix = 0;
 	      break;
 	    }

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 2/9] ARM specific part - test case
  2011-04-01 14:47   ` Tom de Vries
@ 2011-04-01 15:17     ` Tom de Vries
  2011-04-01 15:34     ` Richard Earnshaw
  1 sibling, 0 replies; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 15:17 UTC (permalink / raw)
  To: gcc-patches, richard.earnshaw

[-- Attachment #1: Type: text/plain, Size: 309 bytes --]

On 04/01/2011 05:01 PM, Jakub Jelinek wrote:
> On Fri, Apr 01, 2011 at 04:56:10PM +0200, Tom de Vries wrote:
>> Reposting, with ChangeLog.
>
> gcc/testsuite/ has its own ChangeLog, so the ChangeLog entry
> should say just
> 	* gcc.target/arm/pr43920-2.c: New test.
>
> 	Jakub

Fixed ChangeLog.

Thanks,
- Tom

[-- Attachment #2: 2_arm-size-branch_cost.test.patch --]
[-- Type: text/x-patch, Size: 768 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* gcc.target/arm/pr43920-1.c: New test.

Index: gcc/testsuite/gcc.target/arm/pr43920-1.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-1.c	(revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv7-a -mthumb -Os" } */
+
+int
+f (int start, int end, int *start_)
+{
+  if (start == -1 || end == -1)
+    return -1;
+
+  if (end - start)
+    return -1;
+
+  *start_ = start;
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "\torr" 0 } } */
+/* { dg-final { scan-assembler-times "\tit\t" 0 } } */
+/* { dg-final { scan-assembler "\tbeq" } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case.
  2011-04-01 15:01       ` Jakub Jelinek
@ 2011-04-01 15:18         ` Tom de Vries
  2011-04-01 16:14           ` Tom de Vries
  2011-04-05 21:46           ` Jeff Law
  0 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 15:18 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 443 bytes --]

On 04/01/2011 05:01 PM, Jakub Jelinek wrote:
> On Fri, Apr 01, 2011 at 04:56:10PM +0200, Tom de Vries wrote:
>> Reposting, with ChangeLog.
> 
>> 2011-04-01  Tom de Vries  <tom@codesourcery.com>
>>
>> 	PR target/43920
>> 	* testsuite/gcc.target/arm/pr43920-2.c: New test.
> 
> gcc/testsuite/ has its own ChangeLog, so the ChangeLog entry
> should say just
> 	* gcc.target/arm/pr43920-2.c: New test.
> 
> 	Jakub

Fixed ChangeLog.

Thanks,
- Tom

[-- Attachment #2: 8_crossjump-fallthru-ml.test.patch --]
[-- Type: text/x-patch, Size: 972 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* gcc.target/arm/pr43920-2.c: New test.

Index: gcc/testsuite/gcc.target/arm/pr43920-2.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv7-a -mthumb -Os" } */
+
+#include <stdio.h>
+
+int getFileStartAndLength (int fd, int *start_, size_t *length_)
+{
+      int start, end;
+      size_t length;
+
+      start = lseek (fd, 0L, SEEK_CUR);
+      end = lseek (fd, 0L, SEEK_END);
+
+      if (start == -1 || end == -1)
+         return -1;
+
+      length = end - start;
+      if (length == 0)
+         return -1;
+
+      *start_ = start;
+      *length_ = length;
+
+      return 0;
+}
+
+/* { dg-final { scan-assembler-times "pop" 2 } } */
+/* { dg-final { scan-assembler-times "beq" 3 } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-01 14:46   ` Tom de Vries
@ 2011-04-01 15:19     ` Richard Earnshaw
  2011-04-01 16:06       ` Tom de Vries
  0 siblings, 1 reply; 64+ messages in thread
From: Richard Earnshaw @ 2011-04-01 15:19 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches


On Fri, 2011-04-01 at 16:45 +0200, Tom de Vries wrote:
> Reposting, with ChangeLog.

 #define BRANCH_COST(speed_p, predictable_p) \
-  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
+  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
+               : (optimize > 0 ? 2 : 0))

Don't use optimize_size here, use !speed_p.

Otherwise OK.

R.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 2/9] ARM specific part - test case
  2011-04-01 14:47   ` Tom de Vries
  2011-04-01 15:17     ` Tom de Vries
@ 2011-04-01 15:34     ` Richard Earnshaw
  2011-04-01 16:10       ` Tom de Vries
  1 sibling, 1 reply; 64+ messages in thread
From: Richard Earnshaw @ 2011-04-01 15:34 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches

On Fri, 2011-04-01 at 16:47 +0200, Tom de Vries wrote:
> Reposting, with ChangeLog.

+/* { dg-options "-march=armv7-a -mthumb -Os" } */

No, use dg-require-effective-target.  The above doesn't work properly
with multilib testing.

R.



^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920] Improve code-size optimizations
  2011-04-01 15:06   ` Tom de Vries
@ 2011-04-01 16:06     ` Eric Botcazou
  0 siblings, 0 replies; 64+ messages in thread
From: Eric Botcazou @ 2011-04-01 16:06 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, Maxim Kuvyrkov, Bernd Schmidt

> Sorry about that. I reposted the patches with ChangeLog entries.

Thanks!

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-01 15:19     ` Richard Earnshaw
@ 2011-04-01 16:06       ` Tom de Vries
  2011-04-02  7:47         ` Richard Guenther
  0 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 16:06 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 470 bytes --]

On 04/01/2011 05:18 PM, Richard Earnshaw wrote:
> 
> On Fri, 2011-04-01 at 16:45 +0200, Tom de Vries wrote:
>> Reposting, with ChangeLog.
> 
>  #define BRANCH_COST(speed_p, predictable_p) \
> -  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
> +  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
> +               : (optimize > 0 ? 2 : 0))
> 
> Don't use optimize_size here, use !speed_p.
> 
> Otherwise OK.
> 

Replaced optimize_size by !speed_p.

Thanks,
- Tom

[-- Attachment #2: 1_arm-size-branch_cost.patch --]
[-- Type: text/x-patch, Size: 768 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* config/arm/arm.h (BRANCH_COST): Set to 1 for Thumb-2 when optimizing
	for size.

Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h	(revision 293961)
+++ gcc/config/arm/arm.h	(revision 293962)
@@ -2201,7 +2201,8 @@ typedef struct
 /* Try to generate sequences that don't involve branches, we can then use
    conditional instructions */
 #define BRANCH_COST(speed_p, predictable_p) \
-  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
+  (TARGET_32BIT ? (TARGET_THUMB2 && !speed_p ? 1 : 4) \
+		: (optimize > 0 ? 2 : 0))
 \f
 /* Position Independent Code.  */
 /* We decide which register to use based on the compilation options and

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 2/9] ARM specific part - test case
  2011-04-01 15:34     ` Richard Earnshaw
@ 2011-04-01 16:10       ` Tom de Vries
  2011-04-05 10:43         ` Tom de Vries
  0 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 16:10 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 418 bytes --]

On 04/01/2011 05:34 PM, Richard Earnshaw wrote:
> On Fri, 2011-04-01 at 16:47 +0200, Tom de Vries wrote:
>> Reposting, with ChangeLog.
> 
> +/* { dg-options "-march=armv7-a -mthumb -Os" } */
> 
> No, use dg-require-effective-target.  The above doesn't work properly
> with multilib testing.
> 

Changed it into:

+/* { dg-options "-mthumb -Os" }  */
+/* { dg-require-effective-target arm_thumb2_ok } */

Thanks,
- Tom

[-- Attachment #2: 2_arm-size-branch_cost.test.patch --]
[-- Type: text/x-patch, Size: 807 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* gcc.target/arm/pr43920-1.c: New test.

Index: gcc/testsuite/gcc.target/arm/pr43920-1.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-1.c	(revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mthumb -Os" }  */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+f (int start, int end, int *start_)
+{
+  if (start == -1 || end == -1)
+    return -1;
+
+  if (end - start)
+    return -1;
+
+  *start_ = start;
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "\torr" 0 } } */
+/* { dg-final { scan-assembler-times "\tit\t" 0 } } */
+/* { dg-final { scan-assembler "\tbeq" } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case.
  2011-04-01 15:18         ` Tom de Vries
@ 2011-04-01 16:14           ` Tom de Vries
  2011-04-05 21:46           ` Jeff Law
  1 sibling, 0 replies; 64+ messages in thread
From: Tom de Vries @ 2011-04-01 16:14 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, ebotcazou

[-- Attachment #1: Type: text/plain, Size: 414 bytes --]

On 04/01/2011 05:34 PM, Richard Earnshaw wrote:
> On Fri, 2011-04-01 at 16:47 +0200, Tom de Vries wrote:
> Reposting, with ChangeLog.
>
> +/* { dg-options "-march=armv7-a -mthumb -Os" } */
>
> No, use dg-require-effective-target.  The above doesn't work properly
> with multilib testing.
>

Changed it into:

+/* { dg-options "-mthumb -Os" }  */
+/* { dg-require-effective-target arm_thumb2_ok } */

Thanks,
- Tom

[-- Attachment #2: 8_crossjump-fallthru-ml.test.patch --]
[-- Type: text/x-patch, Size: 1011 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* gcc.target/arm/pr43920-2.c: New test.

Index: gcc/testsuite/gcc.target/arm/pr43920-2.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mthumb -Os" }  */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+#include <stdio.h>
+
+int getFileStartAndLength (int fd, int *start_, size_t *length_)
+{
+      int start, end;
+      size_t length;
+
+      start = lseek (fd, 0L, SEEK_CUR);
+      end = lseek (fd, 0L, SEEK_END);
+
+      if (start == -1 || end == -1)
+         return -1;
+
+      length = end - start;
+      if (length == 0)
+         return -1;
+
+      *start_ = start;
+      *length_ = length;
+
+      return 0;
+}
+
+/* { dg-final { scan-assembler-times "pop" 2 } } */
+/* { dg-final { scan-assembler-times "beq" 3 } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-01 16:06       ` Tom de Vries
@ 2011-04-02  7:47         ` Richard Guenther
  2011-04-02 17:06           ` Tom de Vries
  0 siblings, 1 reply; 64+ messages in thread
From: Richard Guenther @ 2011-04-02  7:47 UTC (permalink / raw)
  To: Tom de Vries; +Cc: Richard Earnshaw, gcc-patches

On Fri, Apr 1, 2011 at 6:06 PM, Tom de Vries <vries@codesourcery.com> wrote:
> On 04/01/2011 05:18 PM, Richard Earnshaw wrote:
>>
>> On Fri, 2011-04-01 at 16:45 +0200, Tom de Vries wrote:
>>> Reposting, with ChangeLog.
>>
>>  #define BRANCH_COST(speed_p, predictable_p) \
>> -  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
>> +  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
>> +               : (optimize > 0 ? 2 : 0))
>>
>> Don't use optimize_size here, use !speed_p.
>>
>> Otherwise OK.
>>
>
> Replaced optimize_size by !speed_p.

I wonder if we can add a code-size test harness.  Using GNU size
for examle, if available and a new dg-final { object-size SIZE } that
fails when the size is greater than the specified one (of course all
object-size tests with specific target restrictions).

I would have started on this myself, but my TCL-fu causes me to jump
off such tasks very quickly ;)

Richard.

> Thanks,
> - Tom
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-02  7:47         ` Richard Guenther
@ 2011-04-02 17:06           ` Tom de Vries
  2011-04-03  7:38             ` Richard Guenther
  0 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-02 17:06 UTC (permalink / raw)
  To: Richard Guenther; +Cc: Richard Earnshaw, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 907 bytes --]

On 04/02/2011 09:47 AM, Richard Guenther wrote:
> On Fri, Apr 1, 2011 at 6:06 PM, Tom de Vries <vries@codesourcery.com> wrote:
>> On 04/01/2011 05:18 PM, Richard Earnshaw wrote:
>>>
>>> On Fri, 2011-04-01 at 16:45 +0200, Tom de Vries wrote:
>>>> Reposting, with ChangeLog.
>>>
>>>  #define BRANCH_COST(speed_p, predictable_p) \
>>> -  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
>>> +  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
>>> +               : (optimize > 0 ? 2 : 0))
>>>
>>> Don't use optimize_size here, use !speed_p.
>>>
>>> Otherwise OK.
>>>
>>
>> Replaced optimize_size by !speed_p.
> 
> I wonder if we can add a code-size test harness.  Using GNU size
> for examle, if available and a new dg-final { object-size SIZE } that
> fails when the size is greater than the specified one (of course all
> object-size tests with specific target restrictions).

like this?

Thanks,
- Tom

[-- Attachment #2: 8_crossjump-fallthru-ml.test.patch --]
[-- Type: text/x-patch, Size: 3661 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* lib/scanasm.exp (object-size): New proc.
	* gcc.target/arm/pr43920-2.c: New test.

Index: gcc/testsuite/lib/scanasm.exp
===================================================================
--- gcc/testsuite/lib/scanasm.exp	(revision 170556)
+++ gcc/testsuite/lib/scanasm.exp	(working copy)
@@ -315,6 +315,83 @@ proc scan-assembler-dem-not { args } {
     }
 }
 
+# Call pass if object size is ok, otherwise fail.
+# example: /* { dg-final { object-size text <= 54 } } */
+proc object-size { args } {
+    global size
+    global base_dir
+
+    if { [llength $args] < 3 } {
+	error "object-size: too few arguments"
+        return
+    }
+    if { [llength $args] > 4 } {
+	error "object-size: too many arguments"
+	return
+    }
+    if { [llength $args] >= 4 } {
+	switch [dg-process-target [lindex $args 1]] {
+	    "S" { }
+	    "N" { return }
+	    "F" { setup_xfail "*-*-*" }
+	    "P" { }
+	}
+    }
+
+    # Find size like we find g++ in g++.exp.
+    if ![info exists size]  {
+	set size [findfile $base_dir/../../../binutils/size \
+		  $base_dir/../../../binutils/size \
+		  [findfile $base_dir/../../size $base_dir/../../size \
+		   [findfile $base_dir/size $base_dir/size \
+		    [transform size]]]]
+	verbose -log "size is $size"
+    }
+
+    upvar 2 name testcase
+    set testcase [lindex $testcase 0]
+    set output_file "[file rootname [file tail $testcase]].o"
+    set output [remote_exec host "$size" "$output_file"]
+    set text [lindex $output 1]
+    set status [lindex $output 0]
+    if { $status != 0 } {
+        error "object-size: $size failed"
+        return
+    }
+
+    set what [lindex $args 0]
+    switch $what {
+        total { set where 9 }
+        bss   { set where 8 }
+        data  { set where 7 }
+        text  { set where 6 }
+        default {
+            error "object-size: illegal argument: $what"
+            return
+        }
+    }
+    set actual [lindex $text $where]
+    verbose -log "$what size is $actual"
+
+    set cmp [lindex $args 1]
+    if { [lsearch { < > <= >= == != } $cmp] == -1 } {
+        error "object-size: illegal argument: $cmp"
+        return
+    }
+
+    set with [lindex $args 2]
+    if { ![string is integer $with ] } {
+        error "object-size: illegal argument: $with"
+        return
+    }
+
+    if { [expr $actual $cmp $with] } {
+	pass "$testcase object-size $what $cmp $with"
+    } else {
+	fail "$testcase object-size $what $cmp $with"
+    }
+}
+
 # Utility for testing that a function is defined on the current line.
 # Call pass if so, otherwise fail.  Invoked directly; the file must
 # have been compiled with -g -dA.
Index: gcc/testsuite/gcc.target/arm/pr43920-2.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-options "-mthumb -Os -save-temps" }  */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+#include <stdio.h>
+
+int getFileStartAndLength (int fd, int *start_, size_t *length_)
+{
+      int start, end;
+      size_t length;
+
+      start = lseek (fd, 0L, SEEK_CUR);
+      end = lseek (fd, 0L, SEEK_END);
+
+      if (start == -1 || end == -1)
+         return -1;
+
+      length = end - start;
+      if (length == 0)
+         return -1;
+
+      *start_ = start;
+      *length_ = length;
+
+      return 0;
+}
+
+/* { dg-final { scan-assembler-times "pop" 2 } } */
+/* { dg-final { scan-assembler-times "beq" 3 } } */
+/* { dg-final { object-size text <= 54 } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-02 17:06           ` Tom de Vries
@ 2011-04-03  7:38             ` Richard Guenther
  2011-04-03 17:03               ` Tom de Vries
  2011-04-03 18:40               ` Mike Stump
  0 siblings, 2 replies; 64+ messages in thread
From: Richard Guenther @ 2011-04-03  7:38 UTC (permalink / raw)
  To: Tom de Vries; +Cc: Richard Earnshaw, gcc-patches, Mike Stump, Rainer Orth

On Sat, Apr 2, 2011 at 7:05 PM, Tom de Vries <vries@codesourcery.com> wrote:
> On 04/02/2011 09:47 AM, Richard Guenther wrote:
>> On Fri, Apr 1, 2011 at 6:06 PM, Tom de Vries <vries@codesourcery.com> wrote:
>>> On 04/01/2011 05:18 PM, Richard Earnshaw wrote:
>>>>
>>>> On Fri, 2011-04-01 at 16:45 +0200, Tom de Vries wrote:
>>>>> Reposting, with ChangeLog.
>>>>
>>>>  #define BRANCH_COST(speed_p, predictable_p) \
>>>> -  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
>>>> +  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
>>>> +               : (optimize > 0 ? 2 : 0))
>>>>
>>>> Don't use optimize_size here, use !speed_p.
>>>>
>>>> Otherwise OK.
>>>>
>>>
>>> Replaced optimize_size by !speed_p.
>>
>> I wonder if we can add a code-size test harness.  Using GNU size
>> for examle, if available and a new dg-final { object-size SIZE } that
>> fails when the size is greater than the specified one (of course all
>> object-size tests with specific target restrictions).
>
> like this?

Yes!

I'm not sure finding the size binary is ok, and maybe we need to
verify that size output actually matches our expectation.  Other
than that it's exactly what I meant.

Mike?  Rainer?

Thanks,
Richard.

> Thanks,
> - Tom
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-03  7:38             ` Richard Guenther
@ 2011-04-03 17:03               ` Tom de Vries
       [not found]                 ` <BANLkTikEruAfGJ392FXtasLv6-yV2tYSRQ@mail.gmail.com>
  2011-04-03 18:40               ` Mike Stump
  1 sibling, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-03 17:03 UTC (permalink / raw)
  To: Richard Guenther; +Cc: Richard Earnshaw, gcc-patches, Mike Stump, Rainer Orth

[-- Attachment #1: Type: text/plain, Size: 1574 bytes --]

On 04/03/2011 09:38 AM, Richard Guenther wrote:
> On Sat, Apr 2, 2011 at 7:05 PM, Tom de Vries <vries@codesourcery.com> wrote:
>> On 04/02/2011 09:47 AM, Richard Guenther wrote:
>>> On Fri, Apr 1, 2011 at 6:06 PM, Tom de Vries <vries@codesourcery.com> wrote:
>>>> On 04/01/2011 05:18 PM, Richard Earnshaw wrote:
>>>>>
>>>>> On Fri, 2011-04-01 at 16:45 +0200, Tom de Vries wrote:
>>>>>> Reposting, with ChangeLog.
>>>>>
>>>>>  #define BRANCH_COST(speed_p, predictable_p) \
>>>>> -  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
>>>>> +  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
>>>>> +               : (optimize > 0 ? 2 : 0))
>>>>>
>>>>> Don't use optimize_size here, use !speed_p.
>>>>>
>>>>> Otherwise OK.
>>>>>
>>>>
>>>> Replaced optimize_size by !speed_p.
>>>
>>> I wonder if we can add a code-size test harness.  Using GNU size
>>> for examle, if available and a new dg-final { object-size SIZE } that
>>> fails when the size is greater than the specified one (of course all
>>> object-size tests with specific target restrictions).
>>
>> like this?
> 
> Yes!
> 
> I'm not sure finding the size binary is ok, 

Me neither. I just copied what I saw done for c++filt in
scan-assembler-dem-not, and found that it works for me.

> and maybe we need to
> verify that size output actually matches our expectation.  

Changes since previous post:
- split output of size into lines
- check format of first and second line
- replaced 'switch $what' with 'lsearch $what'

> Other than that it's exactly what I meant.
> 

Great.

> Mike?  Rainer?
> 

Thanks,
- Tom

[-- Attachment #2: 8_crossjump-fallthru-ml.test.patch --]
[-- Type: text/x-patch, Size: 4025 bytes --]

2011-04-01  Tom de Vries  <tom@codesourcery.com>

	PR target/43920
	* lib/scanasm.exp (object-size): New proc.
	* gcc.target/arm/pr43920-2.c: New test.

Index: gcc/testsuite/lib/scanasm.exp
===================================================================
--- gcc/testsuite/lib/scanasm.exp	(revision 170556)
+++ gcc/testsuite/lib/scanasm.exp	(working copy)
@@ -315,6 +315,92 @@ proc scan-assembler-dem-not { args } {
     }
 }
 
+# Call pass if object size is ok, otherwise fail.
+# example: /* { dg-final { object-size text <= 54 } } */
+proc object-size { args } {
+    global size
+    global base_dir
+
+    if { [llength $args] < 3 } {
+	error "object-size: too few arguments"
+        return
+    }
+    if { [llength $args] > 4 } {
+	error "object-size: too many arguments"
+	return
+    }
+    if { [llength $args] >= 4 } {
+	switch [dg-process-target [lindex $args 1]] {
+	    "S" { }
+	    "N" { return }
+	    "F" { setup_xfail "*-*-*" }
+	    "P" { }
+	}
+    }
+
+    # Find size like we find g++ in g++.exp.
+    if ![info exists size]  {
+	set size [findfile $base_dir/../../../binutils/size \
+		  $base_dir/../../../binutils/size \
+		  [findfile $base_dir/../../size $base_dir/../../size \
+		   [findfile $base_dir/size $base_dir/size \
+		    [transform size]]]]
+	verbose -log "size is $size"
+    }
+
+    upvar 2 name testcase
+    set testcase [lindex $testcase 0]
+    set output_file "[file rootname [file tail $testcase]].o"
+    set output [remote_exec host "$size" "$output_file"]
+    set status [lindex $output 0]
+    if { $status != 0 } {
+        error "object-size: $size failed"
+        return
+    }
+
+    set text [lindex $output 1]
+    set lines [split $text "\n"]
+
+    set line0 [lindex $lines 0]
+    if ![regexp {^\s*text\s+data\s+bss\s+dec\s+hex\s+filename\s*$} $line0] {
+        error "object-size: $size did not produce expected first line: $line0"
+        return
+    }
+
+    set line1 [lindex $lines 1]
+    if ![regexp {^\s*\d+\s+\d+\s+\d+\s+\d+\s+[\da-fA-F]+\s+} $line1] {
+        error "object-size: $size did not produce expected second line: $line1"
+        return
+    }
+
+    set what [lindex $args 0]
+    set where [lsearch { text data bss total } $what]
+    if { $where == -1 } {
+        error "object-size: illegal argument: $what"
+        return
+    }
+    set actual [lindex $line1 $where]
+    verbose -log "$what size is $actual"
+
+    set cmp [lindex $args 1]
+    if { [lsearch { < > <= >= == != } $cmp] == -1 } {
+        error "object-size: illegal argument: $cmp"
+        return
+    }
+
+    set with [lindex $args 2]
+    if { ![string is integer $with ] } {
+        error "object-size: illegal argument: $with"
+        return
+    }
+
+    if { [expr $actual $cmp $with] } {
+	pass "$testcase object-size $what $cmp $with"
+    } else {
+	fail "$testcase object-size $what $cmp $with"
+    }
+}
+
 # Utility for testing that a function is defined on the current line.
 # Call pass if so, otherwise fail.  Invoked directly; the file must
 # have been compiled with -g -dA.
Index: gcc/testsuite/gcc.target/arm/pr43920-2.c
===================================================================
--- gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pr43920-2.c	(revision 0)
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-options "-mthumb -Os -save-temps" }  */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+#include <stdio.h>
+
+int getFileStartAndLength (int fd, int *start_, size_t *length_)
+{
+      int start, end;
+      size_t length;
+
+      start = lseek (fd, 0L, SEEK_CUR);
+      end = lseek (fd, 0L, SEEK_END);
+
+      if (start == -1 || end == -1)
+         return -1;
+
+      length = end - start;
+      if (length == 0)
+         return -1;
+
+      *start_ = start;
+      *length_ = length;
+
+      return 0;
+}
+
+/* { dg-final { scan-assembler-times "pop" 2 } } */
+/* { dg-final { scan-assembler-times "beq" 3 } } */
+/* { dg-final { object-size text <= 54 } } */

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-03  7:38             ` Richard Guenther
  2011-04-03 17:03               ` Tom de Vries
@ 2011-04-03 18:40               ` Mike Stump
  1 sibling, 0 replies; 64+ messages in thread
From: Mike Stump @ 2011-04-03 18:40 UTC (permalink / raw)
  To: Richard Guenther
  Cc: Tom de Vries, Richard Earnshaw, gcc-patches, Mike Stump, Rainer Orth

On Apr 3, 2011, at 12:38 AM, Richard Guenther wrote:
> On Sat, Apr 2, 2011 at 7:05 PM, Tom de Vries <vries@codesourcery.com> wrote:
>> 
>>> I wonder if we can add a code-size test harness.  Using GNU size
>>> for examle, if available and a new dg-final { object-size SIZE } that
>>> fails when the size is greater than the specified one (of course all
>>> object-size tests with specific target restrictions).
>> 
>> like this?
> 
> Yes!

Ok.  This looks fine and clean, with a reasonable interface.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
       [not found]                 ` <BANLkTikEruAfGJ392FXtasLv6-yV2tYSRQ@mail.gmail.com>
@ 2011-04-04 12:14                   ` Richard Guenther
  2011-04-04 12:23                     ` Rainer Orth
  0 siblings, 1 reply; 64+ messages in thread
From: Richard Guenther @ 2011-04-04 12:14 UTC (permalink / raw)
  To: Sergey Ostanevich
  Cc: Tom de Vries, Richard Earnshaw, gcc-patches, Mike Stump, Rainer Orth

On Sun, Apr 3, 2011 at 9:34 PM, Sergey Ostanevich <sergos.gnu@gmail.com> wrote:
> I would recommend to use 'nm -S a.out' that gives
>
> [...]
> 00000000004004a4 0000000000000054 T main
> [...]
>
> then you can provide a name for the routine you want to test for the size.

That also sounds reasonable.  Is nm -S more portable than size?

Richard.

> Regards,
> Sergos
>
>
>
> 2011/4/3 Tom de Vries <vries@codesourcery.com>
>>
>> On 04/03/2011 09:38 AM, Richard Guenther wrote:
>> > On Sat, Apr 2, 2011 at 7:05 PM, Tom de Vries <vries@codesourcery.com>
>> > wrote:
>> >> On 04/02/2011 09:47 AM, Richard Guenther wrote:
>> >>> On Fri, Apr 1, 2011 at 6:06 PM, Tom de Vries <vries@codesourcery.com>
>> >>> wrote:
>> >>>> On 04/01/2011 05:18 PM, Richard Earnshaw wrote:
>> >>>>>
>> >>>>> On Fri, 2011-04-01 at 16:45 +0200, Tom de Vries wrote:
>> >>>>>> Reposting, with ChangeLog.
>> >>>>>
>> >>>>>  #define BRANCH_COST(speed_p, predictable_p) \
>> >>>>> -  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
>> >>>>> +  (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \
>> >>>>> +               : (optimize > 0 ? 2 : 0))
>> >>>>>
>> >>>>> Don't use optimize_size here, use !speed_p.
>> >>>>>
>> >>>>> Otherwise OK.
>> >>>>>
>> >>>>
>> >>>> Replaced optimize_size by !speed_p.
>> >>>
>> >>> I wonder if we can add a code-size test harness.  Using GNU size
>> >>> for examle, if available and a new dg-final { object-size SIZE } that
>> >>> fails when the size is greater than the specified one (of course all
>> >>> object-size tests with specific target restrictions).
>> >>
>> >> like this?
>> >
>> > Yes!
>> >
>> > I'm not sure finding the size binary is ok,
>>
>> Me neither. I just copied what I saw done for c++filt in
>> scan-assembler-dem-not, and found that it works for me.
>>
>> > and maybe we need to
>> > verify that size output actually matches our expectation.
>>
>> Changes since previous post:
>> - split output of size into lines
>> - check format of first and second line
>> - replaced 'switch $what' with 'lsearch $what'
>>
>> > Other than that it's exactly what I meant.
>> >
>>
>> Great.
>>
>> > Mike?  Rainer?
>> >
>>
>> Thanks,
>> - Tom
>
>

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-04 12:14                   ` Richard Guenther
@ 2011-04-04 12:23                     ` Rainer Orth
  2011-04-05 13:35                       ` Tom de Vries
  0 siblings, 1 reply; 64+ messages in thread
From: Rainer Orth @ 2011-04-04 12:23 UTC (permalink / raw)
  To: Richard Guenther
  Cc: Sergey Ostanevich, Tom de Vries, Richard Earnshaw, gcc-patches,
	Mike Stump

Richard Guenther <richard.guenther@gmail.com> writes:

> On Sun, Apr 3, 2011 at 9:34 PM, Sergey Ostanevich <sergos.gnu@gmail.com> wrote:
>> I would recommend to use 'nm -S a.out' that gives
>>
>> [...]
>> 00000000004004a4 0000000000000054 T main
>> [...]
>>
>> then you can provide a name for the routine you want to test for the size.
>
> That also sounds reasonable.  Is nm -S more portable than size?

Neither Solaris nor IRIX nm have it.  size isn't particularly portable,
either: there are many variations in output format.

	Rainer

-- 
-----------------------------------------------------------------------------
Rainer Orth, Center for Biotechnology, Bielefeld University

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes.
  2011-04-01 14:54         ` Tom de Vries
@ 2011-04-04 16:14           ` Tom de Vries
  2011-04-06 17:41           ` Jeff Law
  1 sibling, 0 replies; 64+ messages in thread
From: Tom de Vries @ 2011-04-04 16:14 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches, ebotcazou

Hi Jeff,

On 04/01/2011 04:54 PM, Tom de Vries wrote:
> On 03/31/2011 11:16 PM, Tom de Vries wrote:
>> On 03/31/2011 08:52 PM, Jeff Law wrote:
>>
>>> On 03/31/11 12:42, Tom de Vries wrote:
>>>> Uses regnotes to analyze whether we can replace insn a by insn b, even
>>>> if we cannot replace insn b by insn a. Uses this info in crossjumping.
>>
>>> Shouldn't this be using single_set rather than digging through PATTERN,
>>> then verifying both are SETs, etc.?
>>>
>>> Otherwise don't you miss most of the benefit on architectures where most
>>> insns clobber the flags register in a PARALLEL with the SET?
>>
>> I see what you mean about missing these insns currently.
>>
>> I guess I will have to check that the non-SET part of the PARALLEL is
>> identical between the 2 insns.
>>
>> I'll update the patch to handle this case.
> 
> changes compared to previous posting:
> - add ChangeLog.
> - use single_set
> - add equal_different_set_p and use it in can_replace_by
> 
> Retested on x86_64.
> 

Do these changes (
http://gcc.gnu.org/ml/gcc-patches/2011-04/msg00038.html ) address your
concerns? Is the patch OK for trunk now?

Thanks,
- Tom

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 3/9] Cleanup.
  2011-04-01 15:15     ` Tom de Vries
@ 2011-04-04 18:26       ` Jeff Law
  0 siblings, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-04-04 18:26 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou, Maxim Kuvyrkov, Bernd Schmidt

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 04/01/11 09:15, Tom de Vries wrote:
> On 04/01/2011 05:01 PM, Jakub Jelinek wrote:
>> On Fri, Apr 01, 2011 at 04:56:10PM +0200, Tom de Vries wrote:
>>> Reposting, with ChangeLog.
>>
>> Similarly one of your patches had
>> 	* gcc/cfgcleanup.c (whatever): Whatever.
>> in the ChangeLog, should be just
>> 	* cfgcleanup.c (whatever): Whatever.
>>
>> 	Jakub
> 
> Fixed ChangeLog.
OK.  Though I don't think you need to mention 43920 in this specific
hunk.  It's a mere cleanup and won't effect the PR at all.

Anyway, please go ahead and install this patch.

Thanks,
jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNmg0pAAoJEBRtltQi2kC7ex8H/3M0G8uXjO7kVhQpuF9RN4sE
4MZ/+0+pYH+mQ8UPzBp7+/YexyDcF5pJErZS3kp9J45JZhm+iWT5sUT5JDs8XeXb
Ye4tBvyCItPGy1M6/Vi7ycSm84dCLK1SKsZcA9Nggjy/C6BIuHNKB0B3Lw+ljOtc
gOXlIJZZH7DT1pIOR2j1Lh7FcuuESxE718kvW+Jakp6vTB454ZJz1yzcvoSC66bK
Y74zqjO0JyZVt+iNx/pfsqPvhhmKVp0M6+J6vLZKQx/4VIQOTKC4fi0bSyJK94ec
4JG3idbM0K6lhr/xNxq4IWNutwDsiitYEZw8fb4wA+l8yoTGxuyR5fFa1qQIjNw=
=iXl3
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber.
  2011-04-01 14:48     ` Tom de Vries
@ 2011-04-04 19:11       ` Jeff Law
  0 siblings, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-04-04 19:11 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 04/01/11 08:48, Tom de Vries wrote:
> Reposting, with ChangeLog.

> 	PR target/43920
> 	* cfgcleanup.c (flow_find_cross_jump): Don't count USE or CLOBBER as
> 	insn.

OK.

jeff

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNmhe5AAoJEBRtltQi2kC7shsH/jCMbUcv2O3f3Z+kXMq8az/R
qNJfeeNPn/6In+FDx7168ii0LT5thG5/D6ZFpfrmv2FB4QARF2+5E6XYuXEMLxoJ
PU6SND7Yc2VsVf2eU1AKxtgNVCEsgJWLafVd8GfSO6OT8wjQ/P9vw9NOt0NXBRtU
i5KsPmnfULm9H3HHzKlbgBU1IkpZJIlP9Bj4U0ttDmtCB/72BvTJ7kedoR4Lp1VM
JV8IkgqlGL+ptyN+IVgDX9fQ92TCiSjfiAzAX73LWzjr8VH+HAejPzEIS9bO7Ndr
uNtS2edRJJqNyFbIUApNAab4/eaJpeas8tO3ZALQaCibCnyvMJA5z+uZy7sWF7U=
=kMtv
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 5/9] Cross-jumping - Add missing use of return register.
  2011-04-01 14:49     ` Tom de Vries
@ 2011-04-04 19:11       ` Jeff Law
  0 siblings, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-04-04 19:11 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 04/01/11 08:49, Tom de Vries wrote:
> Reposting, with ChangeLog.
> 	PR target/43920
> 	* function.c (emit_use_return_register_into_block): New function.
> 	(thread_prologue_and_epilogue_insns): Use
> 	emit_use_return_register_into_block.
OK
Jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNmhfwAAoJEBRtltQi2kC7TZUH/0c8CygMEMP7Nr7D3bceYgR0
u9P35+/b4CXCjTCOtvS+tR6c+2Wuc3rRmjFEQq9Q3z7K5DySfbkCVfGCeJ0kGj5T
nMg77l9zINumT+zY460Taqm3EuL7Souh89ahw2D7fyPXh2vaFugWLckoCuzkR/6K
PTRXD/rnaaQNAaU19h5yRtVQBgDdPfFY0KruWMpCyTWYxdA+uvRdfi1BFoV43UdZ
Kis3SANV3mZbJV9fKXcfwolpSZxE6UWcURgd00Z3L1oaGbkyAvtgBhgOKfZ3WBUB
++0aHQztbJSeClD66ML1r6pZzgg4urowDohJme6yGPpt0hJjKEf75PCG0lyBQ/A=
=7zN6
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 2/9] ARM specific part - test case
  2011-04-01 16:10       ` Tom de Vries
@ 2011-04-05 10:43         ` Tom de Vries
  2011-04-05 12:20           ` Richard Earnshaw
  2011-04-06 10:48           ` Ramana Radhakrishnan
  0 siblings, 2 replies; 64+ messages in thread
From: Tom de Vries @ 2011-04-05 10:43 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: gcc-patches

Hi Richard,

On 04/01/2011 06:10 PM, Tom de Vries wrote:
> On 04/01/2011 05:34 PM, Richard Earnshaw wrote:
>> On Fri, 2011-04-01 at 16:47 +0200, Tom de Vries wrote:
>>> Reposting, with ChangeLog.
>>
>> +/* { dg-options "-march=armv7-a -mthumb -Os" } */
>>
>> No, use dg-require-effective-target.  The above doesn't work properly
>> with multilib testing.
>>
> 
> Changed it into:
> 
> +/* { dg-options "-mthumb -Os" }  */
> +/* { dg-require-effective-target arm_thumb2_ok } */
> 

Is this updated version (
http://gcc.gnu.org/ml/gcc-patches/2011-04/msg00058.html ) ok for trunk?

Thanks,
- Tom

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope.
  2011-03-31 18:58     ` Jeff Law
@ 2011-04-05 11:44       ` Tom de Vries
  0 siblings, 0 replies; 64+ messages in thread
From: Tom de Vries @ 2011-04-05 11:44 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches, ebotcazou

Hi Jeff,

On 03/31/2011 08:56 PM, Jeff Law wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
> 
> On 03/31/11 12:43, Tom de Vries wrote:
>> Allows crossjump over fallthru paths.
> OK.
> jeff

You ok'ed patches 7/9 (
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg02258.html ) and 9/9 (
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg02260.html ).

Unfortunately I did not include the ChangeLog entries at that time. I
resubmitted these patches with ChangeLog:
- http://gcc.gnu.org/ml/gcc-patches/2011-04/msg00039.html
- http://gcc.gnu.org/ml/gcc-patches/2011-04/msg00041.html

Are the ChangeLog entries ok as well?

Thanks,
- Tom

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 2/9] ARM specific part - test case
  2011-04-05 10:43         ` Tom de Vries
@ 2011-04-05 12:20           ` Richard Earnshaw
  2011-04-06 10:48           ` Ramana Radhakrishnan
  1 sibling, 0 replies; 64+ messages in thread
From: Richard Earnshaw @ 2011-04-05 12:20 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches


On Tue, 2011-04-05 at 12:42 +0200, Tom de Vries wrote:
> Hi Richard,
> 
> On 04/01/2011 06:10 PM, Tom de Vries wrote:
> > On 04/01/2011 05:34 PM, Richard Earnshaw wrote:
> >> On Fri, 2011-04-01 at 16:47 +0200, Tom de Vries wrote:
> >>> Reposting, with ChangeLog.
> >>
> >> +/* { dg-options "-march=armv7-a -mthumb -Os" } */
> >>
> >> No, use dg-require-effective-target.  The above doesn't work properly
> >> with multilib testing.
> >>
> > 
> > Changed it into:
> > 
> > +/* { dg-options "-mthumb -Os" }  */
> > +/* { dg-require-effective-target arm_thumb2_ok } */
> > 
> 
> Is this updated version (
> http://gcc.gnu.org/ml/gcc-patches/2011-04/msg00058.html ) ok for trunk?
> 

Yes, thanks.

R.


^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-04 12:23                     ` Rainer Orth
@ 2011-04-05 13:35                       ` Tom de Vries
  2011-05-03 18:19                         ` Rainer Orth
  0 siblings, 1 reply; 64+ messages in thread
From: Tom de Vries @ 2011-04-05 13:35 UTC (permalink / raw)
  To: Rainer Orth
  Cc: Richard Guenther, Sergey Ostanevich, Richard Earnshaw,
	gcc-patches, Mike Stump

[-- Attachment #1: Type: text/plain, Size: 681 bytes --]

On 04/04/2011 02:22 PM, Rainer Orth wrote:
> Richard Guenther <richard.guenther@gmail.com> writes:
> 
>> On Sun, Apr 3, 2011 at 9:34 PM, Sergey Ostanevich <sergos.gnu@gmail.com> wrote:
>>> I would recommend to use 'nm -S a.out' that gives
>>>
>>> [...]
>>> 00000000004004a4 0000000000000054 T main
>>> [...]
>>>
>>> then you can provide a name for the routine you want to test for the size.
>>
>> That also sounds reasonable.  Is nm -S more portable than size?
> 
> Neither Solaris nor IRIX nm have it.  size isn't particularly portable,
> either: there are many variations in output format.
> 
> 	Rainer
> 

In case we ever need it, here's a patch to access nm -S.

Thanks,
- Tom

[-- Attachment #2: dg-final-object-symbol-size.log --]
[-- Type: text/x-log, Size: 101 bytes --]

2011-04-05  Tom de Vries  <tom@codesourcery.com>

	* lib/scanasm.exp (object-symbol-size): New proc.

[-- Attachment #3: dg-final-object-symbol-size.patch --]
[-- Type: text/x-patch, Size: 2888 bytes --]

Index: gcc/testsuite/lib/scanasm.exp
===================================================================
--- gcc/testsuite/lib/scanasm.exp (revision 170556)
+++ gcc/testsuite/lib/scanasm.exp (working copy)
@@ -315,6 +315,90 @@ proc scan-assembler-dem-not { args } {
     }
 }
 
+# Call pass if symbol size is ok, otherwise fail.
+# example: /* { dg-final { object-symbol-size main <= 54 } } */
+proc object-symbol-size { args } {
+    global nm
+    global base_dir
+
+    if { [llength $args] < 3 } {
+	error "object-symbol-size: too few arguments"
+        return
+    }
+    if { [llength $args] > 4 } {
+	error "object-symbol-size: too many arguments"
+	return
+    }
+    if { [llength $args] >= 4 } {
+	switch [dg-process-target [lindex $args 1]] {
+	    "S" { }
+	    "N" { return }
+	    "F" { setup_xfail "*-*-*" }
+	    "P" { }
+	}
+    }
+
+    # Find nm like we find g++ in g++.exp.
+    if ![info exists nm]  {
+	set nm [findfile $base_dir/../../../binutils/nm \
+		$base_dir/../../../binutils/nm \
+		[findfile $base_dir/../../nm $base_dir/../../nm \
+		 [findfile $base_dir/nm $base_dir/nm \
+		  [transform nm]]]]
+	verbose -log "nm is $nm"
+    }
+
+    upvar 2 name testcase
+    set testcase [lindex $testcase 0]
+    set output_file "[file rootname [file tail $testcase]].o"
+    set output [remote_exec host "$nm" "-S $output_file"]
+    set status [lindex $output 0]
+    if { $status != 0 } {
+        error "object-symbol-size: $nm failed"
+        return
+    }
+    set text [lindex $output 1]
+
+    set symbol [lindex $args 0]
+    set match [lsearch -all $text $symbol]
+    if { [llength $match] != 1 } {
+        error "object-symbol-size: number of matches for $symbol: [llength $match]"
+        return
+    }
+
+    set type [lindex $text [expr $match - 1]]
+    if ![regexp {^[a-zA-Z\-\?]$} $type] {
+        error "object-symbol-size: type field for $symbol not as expected: $type"
+        return
+    }
+
+    set hex [lindex $text [expr $match - 2]]
+    set actual [expr "0x$hex"]
+    if ![string is integer $actual ] {
+        error "object-symbol-size: size field for $symbol not as expected: $hex"
+        return
+    }
+    verbose -log "$symbol size is $actual"
+
+    set cmp [lindex $args 1]
+    if { [lsearch { < > <= >= == != } $cmp] == -1 } {
+        error "object-symbol-size: illegal argument: $cmp"
+        return
+    }
+
+    set with [lindex $args 2]
+    if ![string is integer $with ] {
+        error "object-symbol-size: illegal argument: $with"
+        return
+    }
+
+    if [expr $actual $cmp $with] {
+	pass "$testcase object-symbol-size $symbol $cmp $with"
+    } else {
+	fail "$testcase object-symbol-size $symbol $cmp $with"
+    }
+}
+
 # Utility for testing that a function is defined on the current line.
 # Call pass if so, otherwise fail.  Invoked directly; the file must
 # have been compiled with -g -dA.

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope.
  2011-04-01 14:55     ` Tom de Vries
@ 2011-04-05 21:45       ` Jeff Law
  0 siblings, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-04-05 21:45 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 04/01/11 08:55, Tom de Vries wrote:
> Reposting, with ChangeLog.
> 
> 	PR target/43920
> 	* cfgcleanup.c (walk_to_nondebug_insn): New function.
> 	(flow_find_cross_jump): Use walk_to_nondebug_insn.  Recalculate bb1 and
> 	bb2.
> 	(try_crossjump_to_edge): Handle case that newpos1 or newpos2 is not src1
> 	or src2.  Redirect edges to the last basic block.  Update frequency and
> 	count on multiple basic blocks in case of fallthru.

OK.

Jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNm41pAAoJEBRtltQi2kC7QWIIAIIc1cyuZpppSnle4bng3FEC
ppBc5MEppp5C0UxUuYdLZBw8RHD1atdQqzVNup0fRIyF0U46EQzt5K1SEPKgMQ7M
HeQw4rWkzeMawHWJUbgxmLT7VpuXVJQvVRPRBMGar+yT8I0x+9PS1FPwSFPsa8qZ
Dal/zcO49BZIkYlg12AwEClUTZcjAfRev8ZXHLlHgEHojhymMAyk/I3UKyXUVveq
9dKFl0XX4RgFTHWbWBU2oZ282w7b9Lf8I5QFcCK+g5nGHbgjtdnQDivgrHPq8ewr
26PkU2718GpCI2yYhGvH6nJ8WVVnBVbH03VHuiic5mUosOOc/Uiz6FpB7/L47Mc=
=avCw
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case.
  2011-04-01 15:18         ` Tom de Vries
  2011-04-01 16:14           ` Tom de Vries
@ 2011-04-05 21:46           ` Jeff Law
  1 sibling, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-04-05 21:46 UTC (permalink / raw)
  To: Tom de Vries; +Cc: Jakub Jelinek, gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 04/01/11 09:18, Tom de Vries wrote:
> On 04/01/2011 05:01 PM, Jakub Jelinek wrote:
>> On Fri, Apr 01, 2011 at 04:56:10PM +0200, Tom de Vries wrote:
>>> Reposting, with ChangeLog.
>>
>>> 2011-04-01  Tom de Vries  <tom@codesourcery.com>
>>>
>>> 	PR target/43920
>>> 	* testsuite/gcc.target/arm/pr43920-2.c: New test.
>>
>> gcc/testsuite/ has its own ChangeLog, so the ChangeLog entry
>> should say just
>> 	* gcc.target/arm/pr43920-2.c: New test.
>>
>> 	Jakub
> 
> Fixed ChangeLog.
> 
> Thanks,
> - Tom
> 	PR target/43920
> 	* gcc.target/arm/pr43920-2.c: New test.
OK.

jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNm42LAAoJEBRtltQi2kC7TGoIAIiabS2pihIf1B6DACMOwQJ5
RRdM5bsWDlEJO6K2u54IMrt4Fmp9j2QT2DpLpU1Sv+QNdpuwfVvTLkwVs2Q0XZTK
YeKjTWfVAuKphr+aeb8PA5/7sfOaGXyWf9WC3N3kKn9E9I1mvVoT0+GXhLgejt0G
PeU/zYRri8JEZcrfOpvAzOHl1NQKXDqt3JKb2ReEvmMFCFiiM56JYks+jtxHmzR5
J4HFsa31kPKej3ljn9PDY9QfMsxJJtfEX2gEF58HVsRPQJBuCIOimrQ5vso+G66D
VXFtsS1nXcMo8aYQ6rM8+TSp2mY+tAPYTcTh0VSHdTlPoEj1tbCKN4Xq97a1oRk=
=5aQb
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 9/9] Cross-jumping - Allow both directions.
  2011-04-01 14:56     ` Tom de Vries
@ 2011-04-05 21:46       ` Jeff Law
  0 siblings, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-04-05 21:46 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 04/01/11 08:56, Tom de Vries wrote:
> Reposting, with ChangeLog.
> 	PR target/43920
> 	* cfgcleanup.c (try_crossjump_to_edge): Add dir parameter.  Pass dir to
> 	flow_find_cross_jump.  Swap variables to implement backward replacement.
> 	(try_crossjump_bb): Add argument to try_crossjump_to_edge.
OK.

jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNm42uAAoJEBRtltQi2kC7h0AH/AoaZ4aIRIfBwPIWJEra66s2
jnwqPp0QuyY70L3+Ik3/5dJIvnfR0lorJ7AzIyUz2hICRLqKP9grYNFIgUe5HpZH
BwCgS4v46Gj2tbwDCZ6ggoNi8PhVIX4QAh/HoSpoIfOGssVnzHUxP0tKlkCzMmQL
V1+RvMnrxSL2ljYT0FRoo49osJ/OXYrrJUPSC8ELm5DbtA7EydHFKtg0W9dtovcC
1LBUWaBPz/rNqaI34dL7RjXcfgYaaOj5s6SPN1DuGv2cGuTjbvMbupa9rWAYHjwA
qW79rqFbVdHyUo55W5ofUniadWs/1Byz2DpsegyxOjx35UBrcH42SRYR2XMFIzI=
=Cd8G
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 2/9] ARM specific part - test case
  2011-04-05 10:43         ` Tom de Vries
  2011-04-05 12:20           ` Richard Earnshaw
@ 2011-04-06 10:48           ` Ramana Radhakrishnan
  2011-04-06 12:29             ` Tom de Vries
  1 sibling, 1 reply; 64+ messages in thread
From: Ramana Radhakrishnan @ 2011-04-06 10:48 UTC (permalink / raw)
  To: Tom de Vries; +Cc: Richard Earnshaw, gcc-patches


> Is this updated version (
> http://gcc.gnu.org/ml/gcc-patches/2011-04/msg00058.html ) ok for trunk?

I think you accidentally applied the patch twice and committed it. Yeah 
unfortunately there is no equivalent of git clean -f -d in svn unless 
you write something up with svn status and some shell scripting foo...

I've now fixed this up with 
http://gcc.gnu.org/ml/gcc-cvs/2011-04/msg00224.html

Ramana

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 2/9] ARM specific part - test case
  2011-04-06 10:48           ` Ramana Radhakrishnan
@ 2011-04-06 12:29             ` Tom de Vries
  0 siblings, 0 replies; 64+ messages in thread
From: Tom de Vries @ 2011-04-06 12:29 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: Richard Earnshaw, gcc-patches

On 04/06/2011 12:48 PM, Ramana Radhakrishnan wrote:
> 
>> Is this updated version (
>> http://gcc.gnu.org/ml/gcc-patches/2011-04/msg00058.html ) ok for trunk?
> 
> I think you accidentally applied the patch twice and committed it. 

That is exactly what happened, according to my history:
...
 1877  patch -p0 < ../patches/arm-size-branch_cost.test.patch
 1878  patch -p0 < ../patches/arm-size-branch_cost.test.patch
 1879  svn add gcc/testsuite/gcc.target/arm/pr43920-1.c
...

> Yeah
> unfortunately there is no equivalent of git clean -f -d in svn unless 
> you write something up with svn status and some shell scripting foo...
> 
> I've now fixed this up with 
> http://gcc.gnu.org/ml/gcc-cvs/2011-04/msg00224.html
> 

Sorry about that, and thanks a lot!

Thanks,
- Tom

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes.
  2011-04-01 14:54         ` Tom de Vries
  2011-04-04 16:14           ` Tom de Vries
@ 2011-04-06 17:41           ` Jeff Law
  1 sibling, 0 replies; 64+ messages in thread
From: Jeff Law @ 2011-04-06 17:41 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gcc-patches, ebotcazou

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 04/01/11 08:54, Tom de Vries wrote:
> On 03/31/2011 11:16 PM, Tom de Vries wrote:
>> On 03/31/2011 08:52 PM, Jeff Law wrote:
>>
>>> On 03/31/11 12:42, Tom de Vries wrote:
>>>> Uses regnotes to analyze whether we can replace insn a by insn b, even
>>>> if we cannot replace insn b by insn a. Uses this info in crossjumping.
>>
>>> Shouldn't this be using single_set rather than digging through PATTERN,
>>> then verifying both are SETs, etc.?
>>>
>>> Otherwise don't you miss most of the benefit on architectures where most
>>> insns clobber the flags register in a PARALLEL with the SET?
>>
>> I see what you mean about missing these insns currently.
>>
>> I guess I will have to check that the non-SET part of the PARALLEL is
>> identical between the 2 insns.
>>
>> I'll update the patch to handle this case.
> 
> changes compared to previous posting:
> - add ChangeLog.
> - use single_set
> - add equal_different_set_p and use it in can_replace_by
> 
> Retested on x86_64.


> 	PR target/43920
> 	* cfgcleanup.c (equal_different_set_p, can_replace_by, merge_dir): New
> 	function.
> 	(old_insns_match_p): Change return type.  Replace return false/true with
> 	return dir_none/dir_both.  Use can_replace_by.
> 	(flow_find_cross_jump): Add dir_p parameter.  Init replacement direction
> 	from dir_p.  Register replacement direction in dir, last_dir and
> 	afterlast_dir.	Handle new return type of old_insns_match_p using
> 	merge_dir.  Return replacement direction in dir_p.
> 	(flow_find_head_matching_sequence, outgoing_edges_match): Handle new
> 	return type of old_insns_match_p.
> 	(try_crossjump_to_edge): Add argument to call to flow_find_cross_jump.
> 	* ifcvt.c ( cond_exec_process_if_block): Add argument to call to
> 	flow_find_cross_jump.
> 	* basic-block.h (enum replace_direction): New type.
> 	(flow_find_cross_jump): Add parameter to declaration.
OK

Jeff
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJNnKW/AAoJEBRtltQi2kC7TC8IAJdB1hgkPmmC787EUBBycPCF
/ROYeWMZ62WyVqOD+eTVFXvv6v4s0XjPHQgS+zANBQPdvA3L8V2ugFYy66SWmQZj
1NSplCrBMRhS9Fu9M8uEWjvuVEUhqxOYLnPKXqeW/gD8UEHt2+gMLAGGFI4pxQRS
L+caqVMGvNvVZqMNAUTU7FLQfT1Zo50sBvbvm9w/GfjSVNC/dmkHRqf4Ta0oIDW/
Zm5oyX4FWzun8NbW+scaQlsxAiEA5xoxzXyGlLnj9UGCTiEeaYIsgg+SyYO8CeO0
o3FpsRfe+jMSK170cd7+mufPktjmCuAdiWQa2M7W6R04AOvdOV7DxNglHMHXljg=
=NrNE
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 64+ messages in thread

* Re: [PATCH, PR43920, 1/9] ARM specific part.
  2011-04-05 13:35                       ` Tom de Vries
@ 2011-05-03 18:19                         ` Rainer Orth
  0 siblings, 0 replies; 64+ messages in thread
From: Rainer Orth @ 2011-05-03 18:19 UTC (permalink / raw)
  To: Tom de Vries
  Cc: Richard Guenther, Sergey Ostanevich, Richard Earnshaw,
	gcc-patches, Mike Stump

Tom,

>>> That also sounds reasonable.  Is nm -S more portable than size?
>> 
>> Neither Solaris nor IRIX nm have it.  size isn't particularly portable,
>> either: there are many variations in output format.
[...]
> In case we ever need it, here's a patch to access nm -S.

sorry for the very late response, but a couple of comments.

> +# Call pass if symbol size is ok, otherwise fail.
> +# example: /* { dg-final { object-symbol-size main <= 54 } } */
> +proc object-symbol-size { args } {

This needs to be documented in doc/sourcebuild.texi (Final Actions).  We
made a concerted effort to document the testsuite and should keep it
that way.

> +    # Find nm like we find g++ in g++.exp.
> +    if ![info exists nm]  {
> +	set nm [findfile $base_dir/../../../binutils/nm \
> +		$base_dir/../../../binutils/nm \
> +		[findfile $base_dir/../../nm $base_dir/../../nm \
> +		 [findfile $base_dir/nm $base_dir/nm \
> +		  [transform nm]]]]
> +	verbose -log "nm is $nm"
> +    }

Please move this to a separate proc like gnat.exp (find_gnatclean).

> +    upvar 2 name testcase
> +    set testcase [lindex $testcase 0]
> +    set output_file "[file rootname [file tail $testcase]].o"
> +    set output [remote_exec host "$nm" "-S $output_file"]
> +    set status [lindex $output 0]
> +    if { $status != 0 } {
> +        error "object-symbol-size: $nm failed"
> +        return
> +    }

As I said, nm -S is completely unportable.  I'd rather at least try if
we can cope with the differences in size output format instead, or
use a hook here that allows to use different implementations.

Thanks for doing this.

	Rainer

-- 
-----------------------------------------------------------------------------
Rainer Orth, Center for Biotechnology, Bielefeld University

^ permalink raw reply	[flat|nested] 64+ messages in thread

end of thread, other threads:[~2011-05-03 18:18 UTC | newest]

Thread overview: 64+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-31 18:27 [PATCH, PR43920] Improve code-size optimizations Tom de Vries
2011-03-31 18:29 ` [PATCH, PR43920, 1/9] ARM specific part Tom de Vries
2011-04-01 14:46   ` Tom de Vries
2011-04-01 15:19     ` Richard Earnshaw
2011-04-01 16:06       ` Tom de Vries
2011-04-02  7:47         ` Richard Guenther
2011-04-02 17:06           ` Tom de Vries
2011-04-03  7:38             ` Richard Guenther
2011-04-03 17:03               ` Tom de Vries
     [not found]                 ` <BANLkTikEruAfGJ392FXtasLv6-yV2tYSRQ@mail.gmail.com>
2011-04-04 12:14                   ` Richard Guenther
2011-04-04 12:23                     ` Rainer Orth
2011-04-05 13:35                       ` Tom de Vries
2011-05-03 18:19                         ` Rainer Orth
2011-04-03 18:40               ` Mike Stump
2011-03-31 18:31 ` [PATCH, PR43920, 2/9] ARM specific part - test case Tom de Vries
2011-04-01 14:47   ` Tom de Vries
2011-04-01 15:17     ` Tom de Vries
2011-04-01 15:34     ` Richard Earnshaw
2011-04-01 16:10       ` Tom de Vries
2011-04-05 10:43         ` Tom de Vries
2011-04-05 12:20           ` Richard Earnshaw
2011-04-06 10:48           ` Ramana Radhakrishnan
2011-04-06 12:29             ` Tom de Vries
2011-03-31 18:35 ` [PATCH, PR43920, 3/9] Cleanup Tom de Vries
2011-03-31 18:43   ` Jeff Law
2011-04-01 14:48   ` Tom de Vries
2011-04-01 15:15     ` Tom de Vries
2011-04-04 18:26       ` Jeff Law
2011-03-31 18:35 ` [PATCH, PR43920, 4-9/9] Cross-jumping Tom de Vries
2011-03-31 18:36   ` [PATCH, PR43920, 4/9] Cross-jumping - Don't count use or clobber Tom de Vries
2011-03-31 18:40     ` Jeff Law
2011-03-31 19:09       ` Tom de Vries
2011-03-31 19:19         ` Jeff Law
2011-04-01 14:48     ` Tom de Vries
2011-04-04 19:11       ` Jeff Law
2011-03-31 18:42   ` [PATCH, PR43920, 5/9] Cross-jumping - Add missing use of return register Tom de Vries
2011-03-31 18:52     ` Jeff Law
2011-04-01 14:49     ` Tom de Vries
2011-04-04 19:11       ` Jeff Law
2011-03-31 18:44   ` [PATCH, PR43920, 6/9] Cross-jumping - Use reg-notes Tom de Vries
2011-03-31 18:56     ` Jeff Law
2011-03-31 21:25       ` Tom de Vries
2011-04-01 14:54         ` Tom de Vries
2011-04-04 16:14           ` Tom de Vries
2011-04-06 17:41           ` Jeff Law
2011-03-31 18:45   ` [PATCH, PR43920, 7/9] Cross-jumping - Extend search scope Tom de Vries
2011-03-31 18:58     ` Jeff Law
2011-04-05 11:44       ` Tom de Vries
2011-04-01 14:55     ` Tom de Vries
2011-04-05 21:45       ` Jeff Law
2011-03-31 18:46   ` [PATCH, PR43920, 8/9] Cross-jumping - Extend search scope - test case Tom de Vries
2011-03-31 19:00     ` Jeff Law
2011-04-01 14:56     ` Tom de Vries
2011-04-01 15:01       ` Jakub Jelinek
2011-04-01 15:18         ` Tom de Vries
2011-04-01 16:14           ` Tom de Vries
2011-04-05 21:46           ` Jeff Law
2011-03-31 18:56   ` [PATCH, PR43920, 9/9] Cross-jumping - Allow both directions Tom de Vries
2011-03-31 19:02     ` Jeff Law
2011-04-01 14:56     ` Tom de Vries
2011-04-05 21:46       ` Jeff Law
2011-03-31 21:16 ` [PATCH, PR43920] Improve code-size optimizations Eric Botcazou
2011-04-01 15:06   ` Tom de Vries
2011-04-01 16:06     ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).