public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch,AVR]: PR50447: Tweak addhi3
@ 2011-10-18 12:22 Georg-Johann Lay
  2011-10-18 12:39 ` Denis Chertykov
  0 siblings, 1 reply; 11+ messages in thread
From: Georg-Johann Lay @ 2011-10-18 12:22 UTC (permalink / raw)
  To: gcc-patches; +Cc: Denis Chertykov, Eric Weddington

[-- Attachment #1: Type: text/plain, Size: 1158 bytes --]

This patch do some tweaks to addhi3 like adding QI scratch register.

The original *addhi3 insn is still there and located prior to new
addhi3_clobber insn because addhi3 is special to reload (thanks Danis for this
note) so that there is a version with and a version without scratch register.

Patch passes without regressions.

Ok for trunk?

	PR target/50447
	* config/avr/avr.md (cc): New alternative out_plus_noclobber.
	(adjust_len): Ditto.
	(addhi3): Don't pipe through short; use gen_int_mode instead.
	Prior to reload, expand to gen_addhi3_clobber.
	(*addhi3): Use avr_out_plus_noclobber if applicable, use
	out_plus_noclobber in cc and adjust_len attribute.
	(addhi3_clobber): 2 new RTL peepholes.
	(addhi3_clobber): New insn.
	* config/avr/avr-protos.h: (avr_out_plus_noclobber): New prototype.
	* config/avr/avr.c (avr_out_plus_noclobber): New function.
	(notice_update_cc): Handle CC_OUT_PLUS_NOCLOBBER.
	(avr_out_plus_1): Tweak if only MSB is +/-1 and other bytes are 0.
	Set cc0 to set_zn for adiw on 16-bit values.
	(adjust_insn_length): Handle ADJUST_LEN_OUT_PLUS_NOCLOBBER.
	(expand_epilogue): No need to add 0 to frame_pointer_rtx.

Johann

[-- Attachment #2: pr50447-addhi3-v2.diff --]
[-- Type: text/x-patch, Size: 8996 bytes --]

Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 180104)
+++ config/avr/avr.md	(working copy)
@@ -78,7 +78,7 @@ (define_c_enum "unspecv"
   
 ;; Condition code settings.
 (define_attr "cc" "none,set_czn,set_zn,set_n,compare,clobber,
-                   out_plus"
+                   out_plus, out_plus_noclobber"
   (const_string "none"))
 
 (define_attr "type" "branch,branch1,arith,xcall"
@@ -125,7 +125,8 @@ (define_attr "length" ""
 ;; Otherwise do special processing depending on the attribute.
 
 (define_attr "adjust_len"
-  "out_bitop, out_plus, addto_sp, tsthi, tstsi, compare, call,
+  "out_bitop, out_plus, out_plus_noclobber, addto_sp,
+   tsthi, tstsi, compare, call,
    mov8, mov16, mov32, reload_in16, reload_in32,
    ashlqi, ashrqi, lshrqi,
    ashlhi, ashrhi, lshrhi,
@@ -759,14 +760,23 @@ (define_expand "addhi3"
 	(plus:HI (match_operand:HI 1 "register_operand" "")
 		 (match_operand:HI 2 "nonmemory_operand" "")))]
   ""
-  "
-{
-  if (GET_CODE (operands[2]) == CONST_INT)
-    {
-      short tmp = INTVAL (operands[2]);
-      operands[2] = GEN_INT(tmp);
-    }
-}")
+  {
+    if (CONST_INT_P (operands[2]))
+      {
+        operands[2] = gen_int_mode (INTVAL (operands[2]), HImode);
+
+        if (!reload_completed
+            && !reload_in_progress
+            && !stack_register_operand (operands[0], HImode)
+            && !stack_register_operand (operands[1], HImode)
+            && !d_register_operand (operands[0], HImode)
+            && !d_register_operand (operands[1], HImode))
+          {
+            emit_insn (gen_addhi3_clobber (operands[0], operands[1], operands[2]));
+            DONE;
+          }
+      }
+  })
 
 
 (define_insn "*addhi3_zero_extend"
@@ -803,20 +813,77 @@ (define_insn "*addhi3_sp_R"
    (set_attr "adjust_len" "addto_sp")])
 
 (define_insn "*addhi3"
-  [(set (match_operand:HI 0 "register_operand" "=r,!w,!w,d,r,r")
- 	(plus:HI
- 	 (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0")
- 	 (match_operand:HI 2 "nonmemory_operand" "r,I,J,i,P,N")))]
+  [(set (match_operand:HI 0 "register_operand"          "=r,d,d")
+        (plus:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+                 (match_operand:HI 2 "nonmemory_operand" "r,s,n")))]
   ""
-  "@
- 	add %A0,%A2\;adc %B0,%B2
- 	adiw %A0,%2
- 	sbiw %A0,%n2
- 	subi %A0,lo8(-(%2))\;sbci %B0,hi8(-(%2))
- 	sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__
- 	sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__"
-  [(set_attr "length" "2,1,1,2,3,3")
-   (set_attr "cc" "set_n,set_czn,set_czn,set_czn,set_n,set_n")])
+  {
+    static const char * const asm_code[] =
+      {
+        "add %A0,%A2\;adc %B0,%B2",
+        "subi %A0,lo8(-(%2))\;sbci %B0,hi8(-(%2))",
+        ""
+      };
+
+    if (*asm_code[which_alternative])
+      return asm_code[which_alternative];
+
+    return avr_out_plus_noclobber (operands, NULL, NULL);
+  }
+  [(set_attr "length" "2,2,2")
+   (set_attr "adjust_len" "*,*,out_plus_noclobber")
+   (set_attr "cc" "set_n,set_czn,out_plus_noclobber")])
+
+;; Adding a constant to NO_LD_REGS might have lead to a reload of
+;; that constant to LD_REGS.  We don't add a scratch to *addhi3
+;; itself because that insn is special to reload.
+
+(define_peephole2 ; *addhi3_clobber
+  [(set (match_operand:HI 0 "d_register_operand" "")
+        (match_operand:HI 1 "const_int_operand" ""))
+   (set (match_operand:HI 2 "l_register_operand" "")
+	(plus:HI (match_dup 2)
+                 (match_dup 0)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(set (match_dup 2)
+                   (plus:HI (match_dup 2)
+                            (match_dup 1)))
+              (clobber (match_dup 3))])]
+  {
+    operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, 0);
+  })
+
+;; Same, but with reload to NO_LD_REGS
+;; Combine *reload_inhi with *addhi3
+
+(define_peephole2 ; *addhi3_clobber
+  [(parallel [(set (match_operand:HI 0 "l_register_operand" "")
+                   (match_operand:HI 1 "const_int_operand" ""))
+              (clobber (match_operand:QI 2 "d_register_operand" ""))])
+   (set (match_operand:HI 3 "l_register_operand" "")
+	(plus:HI (match_dup 3)
+                 (match_dup 0)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(set (match_dup 3)
+                   (plus:HI (match_dup 3)
+                            (match_dup 1)))
+              (clobber (match_dup 2))])])
+
+(define_insn "addhi3_clobber"
+  [(set (match_operand:HI 0 "register_operand"           "=d,l")
+ 	(plus:HI (match_operand:HI 1 "register_operand"  "%0,0")
+                 (match_operand:HI 2 "const_int_operand"  "n,n")))
+   (clobber (match_scratch:QI 3                          "=X,&d"))]
+  ""
+  {
+    gcc_assert (REGNO (operands[0]) == REGNO (operands[1]));
+    
+    return avr_out_plus (operands, NULL, NULL);
+  }
+  [(set_attr "length" "4")
+   (set_attr "adjust_len" "out_plus")
+   (set_attr "cc" "out_plus")])
+
 
 (define_insn "addsi3"
   [(set (match_operand:SI 0 "register_operand"          "=r,d ,d,r")
Index: config/avr/avr-protos.h
===================================================================
--- config/avr/avr-protos.h	(revision 180102)
+++ config/avr/avr-protos.h	(working copy)
@@ -83,6 +83,7 @@ extern void avr_output_addr_vec_elt (FIL
 extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]);
 extern const char* avr_out_bitop (rtx, rtx*, int*);
 extern const char* avr_out_plus (rtx*, int*, int*);
+extern const char* avr_out_plus_noclobber (rtx*, int*, int*);
 extern const char* avr_out_addto_sp (rtx*, int*);
 extern bool avr_popcount_each_byte (rtx, int, int);
 
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(revision 180104)
+++ config/avr/avr.c	(working copy)
@@ -1051,9 +1051,10 @@ expand_epilogue (bool sibcall_p)
       if (frame_pointer_needed)
 	{
           /*  Get rid of frame.  */
-	  emit_move_insn(frame_pointer_rtx,
-                         gen_rtx_PLUS (HImode, frame_pointer_rtx,
-                                       gen_int_mode (size, HImode)));
+          if (size)
+            emit_move_insn (frame_pointer_rtx,
+                            gen_rtx_PLUS (HImode, frame_pointer_rtx,
+                                          gen_int_mode (size, HImode)));
 	}
       else
 	{
@@ -1682,14 +1683,19 @@ notice_update_cc (rtx body ATTRIBUTE_UNU
       break;
 
     case CC_OUT_PLUS:
+    case CC_OUT_PLUS_NOCLOBBER:
       {
         rtx *op = recog_data.operand;
         int len_dummy, icc;
         
         /* Extract insn's operands.  */
         extract_constrain_insn_cached (insn);
+
+        if (CC_OUT_PLUS == cc)
+          avr_out_plus (op, &len_dummy, &icc);
+        else
+          avr_out_plus_noclobber (op, &len_dummy, &icc);
         
-        avr_out_plus (op, &len_dummy, &icc);
         cc = (enum attr_cc) icc;
         
         break;
@@ -4773,7 +4779,8 @@ avr_out_plus_1 (rtx *xop, int *plen, enu
   /* Value to add.  There are two ways to add VAL: R += VAL and R -= -VAL.  */
   rtx xval = xop[2];
 
-  /* Addition does not set cc0 in a usable way.  */
+  /* Except in the case of ADIW with 16-bit register (see below)
+     addition does not set cc0 in a usable way.  */
   
   *pcc = (MINUS == code) ? CC_SET_CZN : CC_CLOBBER;
 
@@ -4821,6 +4828,9 @@ avr_out_plus_1 (rtx *xop, int *plen, enu
                   started = true;
                   avr_asm_len (code == PLUS ? "adiw %0,%1" : "sbiw %0,%1",
                                op, plen, 1);
+
+                  if (n_bytes == 2 && PLUS == code)
+                      *pcc = CC_SET_ZN;
                 }
 
               i++;
@@ -4836,6 +4846,14 @@ avr_out_plus_1 (rtx *xop, int *plen, enu
                          op, plen, 1);
           continue;
         }
+      else if ((val8 == 1 || val8 == 0xff)
+               && !started
+               && i == n_bytes - 1)
+      {
+          avr_asm_len ((code == PLUS) ^ (val8 == 1) ? "dec %0" : "inc %0",
+                       op, plen, 1);
+          break;
+      }
 
       switch (code)
         {
@@ -4924,6 +4942,22 @@ avr_out_plus (rtx *xop, int *plen, int *
 }
 
 
+/* Same as above but XOP has just 3 entries.
+   Supply a dummy 4th operand.  */
+
+const char*
+avr_out_plus_noclobber (rtx *xop, int *plen, int *pcc)
+{
+  rtx op[4];
+
+  op[0] = xop[0];
+  op[1] = xop[1];
+  op[2] = xop[2];
+  op[3] = NULL_RTX;
+
+  return avr_out_plus (op, plen, pcc);
+}
+
 /* Output bit operation (IOR, AND, XOR) with register XOP[0] and compile
    time constant XOP[2]:
 
@@ -5308,6 +5342,8 @@ adjust_insn_length (rtx insn, int len)
     case ADJUST_LEN_OUT_BITOP: avr_out_bitop (insn, op, &len); break;
       
     case ADJUST_LEN_OUT_PLUS: avr_out_plus (op, &len, NULL); break;
+    case ADJUST_LEN_OUT_PLUS_NOCLOBBER:
+      avr_out_plus_noclobber (op, &len, NULL); break;
 
     case ADJUST_LEN_ADDTO_SP: avr_out_addto_sp (op, &len); break;
       

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-18 12:22 [Patch,AVR]: PR50447: Tweak addhi3 Georg-Johann Lay
@ 2011-10-18 12:39 ` Denis Chertykov
  2011-10-18 13:51   ` Georg-Johann Lay
  0 siblings, 1 reply; 11+ messages in thread
From: Denis Chertykov @ 2011-10-18 12:39 UTC (permalink / raw)
  To: Georg-Johann Lay; +Cc: gcc-patches, Eric Weddington

2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
> This patch do some tweaks to addhi3 like adding QI scratch register.
>
> The original *addhi3 insn is still there and located prior to new
> addhi3_clobber insn because addhi3 is special to reload (thanks Danis for this
> note) so that there is a version with and a version without scratch register.
>
> Patch passes without regressions.
>

Which improvements added by this patch ?

Denis.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-18 12:39 ` Denis Chertykov
@ 2011-10-18 13:51   ` Georg-Johann Lay
  2011-10-18 14:23     ` Denis Chertykov
  0 siblings, 1 reply; 11+ messages in thread
From: Georg-Johann Lay @ 2011-10-18 13:51 UTC (permalink / raw)
  To: Denis Chertykov; +Cc: gcc-patches, Eric Weddington

Denis Chertykov schrieb:
> 2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
>> This patch do some tweaks to addhi3 like adding QI scratch register.
>>
>> The original *addhi3 insn is still there and located prior to new
>> addhi3_clobber insn because addhi3 is special to reload (thanks Danis for this
>> note) so that there is a version with and a version without scratch register.
>>
>> Patch passes without regressions.
>>
> 
> Which improvements added by this patch ?
> 
> Denis.

If the addhi3 is expanded early, the addition happens with QI scratch which
avoids reload of constant if target register is in NO_LD. And reduce register
pressure as only QI is needed and not reload of constant to HI.

Otherwise, there might be sequences like

ldi r31, 2    ; *reload_inhi
mov r12, r31
clr r13

add r14, r12  ; *addhi3
adc r15, r13

which now will be

ldi r31, 2    ; addhi3_clobber
add r14, r31
adc r15, __zero_reg__

Similar applies if the reload of the constant happens to LD regs:

ldi r30, 2    ; *movhi
clr r31

add r14, r12  ; *addhi3
adc r15, r13

will become

ldi r30, 2    ; addhi3_clobber
add r14, r30
adc r15, __zero_reg__

For *addhi3 insns the register pressure is not reduced but the insn sequence
might be smarter if peep2 comes up with a QI scratch or if it detects a
*reload_inhi insn just prior to the addition (and the reg that holds the
reloaded constant dies after the addition).

As *addhi3 is special to reload, there is still an "ordinary" add addhi insn
without scratch. This is easier because, e.g. prologue and epilogue generation
generate add insns (not by means of addhi3 expander but by explicit
gan_rtx_PLUS). Yet the addhi3 expander factors out the situations when an
addhi3 insn is to be generated via addhi3 expander late in the compilation process.

Johann

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-18 13:51   ` Georg-Johann Lay
@ 2011-10-18 14:23     ` Denis Chertykov
  2011-10-18 17:37       ` Georg-Johann Lay
  0 siblings, 1 reply; 11+ messages in thread
From: Denis Chertykov @ 2011-10-18 14:23 UTC (permalink / raw)
  To: Georg-Johann Lay; +Cc: gcc-patches, Eric Weddington

2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
> Denis Chertykov schrieb:
>> 2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
>>> This patch do some tweaks to addhi3 like adding QI scratch register.
>>>
>>> The original *addhi3 insn is still there and located prior to new
>>> addhi3_clobber insn because addhi3 is special to reload (thanks Danis for this
>>> note) so that there is a version with and a version without scratch register.
>>>
>>> Patch passes without regressions.
>>>
>>
>> Which improvements added by this patch ?
>>
>> Denis.
>
> If the addhi3 is expanded early, the addition happens with QI scratch which
> avoids reload of constant if target register is in NO_LD. And reduce register
> pressure as only QI is needed and not reload of constant to HI.
>
> Otherwise, there might be sequences like
>
> ldi r31, 2    ; *reload_inhi
> mov r12, r31
> clr r13
>
> add r14, r12  ; *addhi3
> adc r15, r13
>
> which now will be
>
> ldi r31, 2    ; addhi3_clobber
> add r14, r31
> adc r15, __zero_reg__
>
> Similar applies if the reload of the constant happens to LD regs:
>
> ldi r30, 2    ; *movhi
> clr r31
>
> add r14, r12  ; *addhi3
> adc r15, r13
>
> will become
>
> ldi r30, 2    ; addhi3_clobber
> add r14, r30
> adc r15, __zero_reg__
>
> For *addhi3 insns the register pressure is not reduced but the insn sequence
> might be smarter if peep2 comes up with a QI scratch or if it detects a
> *reload_inhi insn just prior to the addition (and the reg that holds the
> reloaded constant dies after the addition).
>
> As *addhi3 is special to reload, there is still an "ordinary" add addhi insn
> without scratch. This is easier because, e.g. prologue and epilogue generation
> generate add insns (not by means of addhi3 expander but by explicit
> gan_rtx_PLUS). Yet the addhi3 expander factors out the situations when an
> addhi3 insn is to be generated via addhi3 expander late in the compilation process

Please provide any real world example.

Denis.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-18 14:23     ` Denis Chertykov
@ 2011-10-18 17:37       ` Georg-Johann Lay
  2011-10-18 18:25         ` Denis Chertykov
  0 siblings, 1 reply; 11+ messages in thread
From: Georg-Johann Lay @ 2011-10-18 17:37 UTC (permalink / raw)
  To: Denis Chertykov; +Cc: gcc-patches, Eric Weddington

[-- Attachment #1: Type: text/plain, Size: 3167 bytes --]

Denis Chertykov schrieb:
> 2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
>> Denis Chertykov schrieb:
>>> 2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
>>>> This patch do some tweaks to addhi3 like adding QI scratch register.
>>>>
>>>> The original *addhi3 insn is still there and located prior to new
>>>> addhi3_clobber insn because addhi3 is special to reload (thanks Danis for this
>>>> note) so that there is a version with and a version without scratch register.
>>>>
>>>> Patch passes without regressions.
>>>>
>>> Which improvements added by this patch ?
>>>
>>> Denis.
>> If the addhi3 is expanded early, the addition happens with QI scratch which
>> avoids reload of constant if target register is in NO_LD. And reduce register
>> pressure as only QI is needed and not reload of constant to HI.
>>
>> Otherwise, there might be sequences like
>>
>> ldi r31, 2    ; *reload_inhi
>> mov r12, r31
>> clr r13
>>
>> add r14, r12  ; *addhi3
>> adc r15, r13
>>
>> which now will be
>>
>> ldi r31, 2    ; addhi3_clobber
>> add r14, r31
>> adc r15, __zero_reg__
>>
>> Similar applies if the reload of the constant happens to LD regs:
>>
>> ldi r30, 2    ; *movhi
>> clr r31
>>
>> add r14, r12  ; *addhi3
>> adc r15, r13
>>
>> will become
>>
>> ldi r30, 2    ; addhi3_clobber
>> add r14, r30
>> adc r15, __zero_reg__
>>
>> For *addhi3 insns the register pressure is not reduced but the insn sequence
>> might be smarter if peep2 comes up with a QI scratch or if it detects a
>> *reload_inhi insn just prior to the addition (and the reg that holds the
>> reloaded constant dies after the addition).
>>
>> As *addhi3 is special to reload, there is still an "ordinary" add addhi insn
>> without scratch. This is easier because, e.g. prologue and epilogue generation
>> generate add insns (not by means of addhi3 expander but by explicit
>> gan_rtx_PLUS). Yet the addhi3 expander factors out the situations when an
>> addhi3 insn is to be generated via addhi3 expander late in the compilation process
> 
> Please provide any real world example.
> 
> Denis.

Consider avr-libc (under the assumption that it is "real world" code):

In avr-libc's build directory, and with the patch integrated:

$ cd avr/lib/avr4
$ make clean && make CFLAGS='-save-temps -dp -Os'
$ grep -A 2 'addhi3_clobber\/2' *.s > out-nopeep2.txt (see attachment)
$ grep 'addhi3_clobber\/2' *.s | wc -l
33

This shows that the insns are already there before peep2 and thus no reload of
16-bit constant is needed; an 8-bit scratch is sufficient.

Alternatively, the implementation could omit the expansion to addhi3_clobber in
addhi3 expander and instead rely completely on peep2. However, that does not
reduce register pressure because a 16-bit register will be allocated and the
peep2 just prints things smarter and needs just a QI scratch to call
avr_out_plus_clobber.

For +/-1, the addition with SEC/ADD/ADC resp. SEC/SBC/SBC leaves cc0 in a mess.
 as most loops use +/-1 on the counter variable, LDI/SUB/SBC is not shorter but
better because it sets cc0.

So you like this patch?
Or prefer a patch that is neutral with respect to register allocator and just
uses peep2 to print things smarter?

Johann



[-- Attachment #2: out-nopeep2.txt --]
[-- Type: text/plain, Size: 4567 bytes --]

dtoa_prf.s:	ldi r31,3	 ; ,	 ;  338	addhi3_clobber/2	[length = 3]
dtoa_prf.s-	add r12,r31	 ;  s,
dtoa_prf.s-	adc r13,__zero_reg__	 ;  s
--
dtoa_prf.s:	ldi r31,3	 ; ,	 ;  447	addhi3_clobber/2	[length = 3]
dtoa_prf.s-	add r12,r31	 ;  s,
dtoa_prf.s-	adc r13,__zero_reg__	 ;  s
--
fgets.s:	ldi r31,1	 ; ,	 ;  70	addhi3_clobber/2	[length = 3]
fgets.s-	sub r14,r31	 ;  ivtmp.9,
fgets.s-	sbc r15,__zero_reg__	 ;  ivtmp.9
--
realloc.s:	ldi r17,2	 ; ,	 ;  80	addhi3_clobber/2	[length = 3]
realloc.s-	add r12,r17	 ;  tmp83,
realloc.s-	adc r13,__zero_reg__	 ; 
--
realloc.s:	ldi r18,2	 ; ,	 ;  85	addhi3_clobber/2	[length = 3]
realloc.s-	add r12,r18	 ;  tmp84,
realloc.s-	adc r13,__zero_reg__	 ; 
--
strtod.s:	ldi r31,1	 ; ,	 ;  101	addhi3_clobber/2	[length = 3]
strtod.s-	sub r14,r31	 ;  D.2581,
strtod.s-	sbc r15,__zero_reg__	 ;  D.2581
--
strtod.s:	ldi r18,2	 ; ,	 ;  110	addhi3_clobber/2	[length = 3]
strtod.s-	add r14,r18	 ;  nptr,
strtod.s-	adc r15,__zero_reg__	 ;  nptr
--
strtod.s:	ldi r21,7	 ; ,	 ;  120	addhi3_clobber/2	[length = 3]
strtod.s-	add r14,r21	 ;  nptr,
strtod.s-	adc r15,__zero_reg__	 ;  nptr
--
strtod.s:	ldi r31,255	 ; ,	 ;  175	addhi3_clobber/2	[length = 3]
strtod.s-	sub r14,r31	 ;  exp,
strtod.s-	sbc r15,r31	 ;  exp,
--
strtod.s:	ldi r18,1	 ; ,	 ;  185	addhi3_clobber/2	[length = 3]
strtod.s-	sub r14,r18	 ;  exp,
strtod.s-	sbc r15,__zero_reg__	 ;  exp
--
strtod.s:	ldi r31,24	 ; ,	 ;  376	addhi3_clobber/2	[length = 3]
strtod.s-	sub r8,r31	 ;  D.2735,
strtod.s-	sbc r9,__zero_reg__	 ;  D.2735
--
strtol.s:	ldi r31,2	 ; ,	 ;  128	addhi3_clobber/2	[length = 3]
strtol.s-	add r6,r31	 ;  nptr,
strtol.s-	adc r7,__zero_reg__	 ;  nptr
--
strtol.s:	ldi r31,1	 ; ,	 ;  242	addhi3_clobber/2	[length = 3]
strtol.s-	sub r6,r31	 ;  tmp117,
strtol.s-	sbc r7,__zero_reg__	 ; 
--
strtol.s:	ldi r31,2	 ; ,	 ;  252	addhi3_clobber/2	[length = 3]
strtol.s-	sub r6,r31	 ;  tmp119,
strtol.s-	sbc r7,__zero_reg__	 ; 
--
strtoul.s:	ldi r31,2	 ; ,	 ;  126	addhi3_clobber/2	[length = 3]
strtoul.s-	add r14,r31	 ;  nptr,
strtoul.s-	adc r15,__zero_reg__	 ;  nptr
--
strtoul.s:	ldi r31,1	 ; ,	 ;  229	addhi3_clobber/2	[length = 3]
strtoul.s-	sub r14,r31	 ;  tmp113,
strtoul.s-	sbc r15,__zero_reg__	 ; 
--
strtoul.s:	ldi r31,2	 ; ,	 ;  239	addhi3_clobber/2	[length = 3]
strtoul.s-	sub r14,r31	 ;  tmp115,
strtoul.s-	sbc r15,__zero_reg__	 ; 
--
vfprintf.s:	ldi r24,4	 ; ,	 ;  399	addhi3_clobber/2	[length = 3]
vfprintf.s-	add r4,r24	 ;  ap,
vfprintf.s-	adc r5,__zero_reg__	 ;  ap
--
vfprintf.s:	ldi r21,10	 ; ,	 ;  850	addhi3_clobber/2	[length = 3]
vfprintf.s-	sub r10,r21	 ;  exp,
vfprintf.s-	sbc r11,__zero_reg__	 ;  exp
--
vfprintf.s:	ldi r30,2	 ; ,	 ;  882	addhi3_clobber/2	[length = 3]
vfprintf.s-	add r4,r30	 ;  ap,
vfprintf.s-	adc r5,__zero_reg__	 ;  ap
--
vfprintf.s:	ldi r31,2	 ; ,	 ;  892	addhi3_clobber/2	[length = 3]
vfprintf.s-	add r4,r31	 ;  ap,
vfprintf.s-	adc r5,__zero_reg__	 ;  ap
--
vfprintf.s:	ldi r31,2	 ; ,	 ;  919	addhi3_clobber/2	[length = 3]
vfprintf.s-	add r4,r31	 ;  ap,
vfprintf.s-	adc r5,__zero_reg__	 ;  ap
--
vfprintf.s:	ldi r31,1	 ; ,	 ;  987	addhi3_clobber/2	[length = 3]
vfprintf.s-	sub r8,r31	 ;  size,
vfprintf.s-	sbc r9,__zero_reg__	 ;  size
--
vfprintf.s:	ldi r18,4	 ; ,	 ;  1012	addhi3_clobber/2	[length = 3]
vfprintf.s-	add r4,r18	 ;  ap,
vfprintf.s-	adc r5,__zero_reg__	 ;  ap
--
vfprintf.s:	ldi r31,2	 ; ,	 ;  1019	addhi3_clobber/2	[length = 3]
vfprintf.s-	add r4,r31	 ;  ap,
vfprintf.s-	adc r5,__zero_reg__	 ;  ap
--
vfprintf.s:	ldi r30,4	 ; ,	 ;  1109	addhi3_clobber/2	[length = 3]
vfprintf.s-	add r4,r30	 ;  ap,
vfprintf.s-	adc r5,__zero_reg__	 ;  ap
--
vfprintf.s:	ldi r31,2	 ; ,	 ;  1116	addhi3_clobber/2	[length = 3]
vfprintf.s-	add r4,r31	 ;  ap,
vfprintf.s-	adc r5,__zero_reg__	 ;  ap
--
vfscanf.s:	ldi r27,1	 ; ,	 ;  213	addhi3_clobber/2	[length = 3]
vfscanf.s-	sub r10,r27	 ;  width,
vfscanf.s-	sbc r11,__zero_reg__	 ;  width
--
vfscanf.s:	ldi r25,255	 ; ,	 ;  163	addhi3_clobber/2	[length = 3]
vfscanf.s-	sub r12,r25	 ;  exp,
vfscanf.s-	sbc r13,r25	 ;  exp,
--
vfscanf.s:	ldi r30,1	 ; ,	 ;  173	addhi3_clobber/2	[length = 3]
vfscanf.s-	sub r12,r30	 ;  exp,
vfscanf.s-	sbc r13,__zero_reg__	 ;  exp
--
vfscanf.s:	ldi r25,24	 ; ,	 ;  354	addhi3_clobber/2	[length = 3]
vfscanf.s-	sub r6,r25	 ;  D.3471,
vfscanf.s-	sbc r7,__zero_reg__	 ;  D.3471
--
vfscanf.s:	ldi r31,1	 ; ,	 ;  235	addhi3_clobber/2	[length = 3]
vfscanf.s-	sub r12,r31	 ;  width,
vfscanf.s-	sbc r13,__zero_reg__	 ;  width
--
vfscanf.s:	ldi r31,1	 ; ,	 ;  334	addhi3_clobber/2	[length = 3]
vfscanf.s-	sub r12,r31	 ;  width,
vfscanf.s-	sbc r13,__zero_reg__	 ;  width

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-18 17:37       ` Georg-Johann Lay
@ 2011-10-18 18:25         ` Denis Chertykov
  2011-10-18 18:28           ` Denis Chertykov
  2011-10-18 19:59           ` Georg-Johann Lay
  0 siblings, 2 replies; 11+ messages in thread
From: Denis Chertykov @ 2011-10-18 18:25 UTC (permalink / raw)
  To: Georg-Johann Lay; +Cc: gcc-patches, Eric Weddington

2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
> Denis Chertykov schrieb:
>> 2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
>>> Denis Chertykov schrieb:
>>>> 2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
>>>>> This patch do some tweaks to addhi3 like adding QI scratch register.
>>>>>
>>>>> The original *addhi3 insn is still there and located prior to new
>>>>> addhi3_clobber insn because addhi3 is special to reload (thanks Danis for this
>>>>> note) so that there is a version with and a version without scratch register.
>>>>>
>>>>> Patch passes without regressions.
>>>>>
>>>> Which improvements added by this patch ?
>>>>
>>>> Denis.
>>> If the addhi3 is expanded early, the addition happens with QI scratch which
>>> avoids reload of constant if target register is in NO_LD. And reduce register
>>> pressure as only QI is needed and not reload of constant to HI.
>>>
>>> Otherwise, there might be sequences like
>>>
>>> ldi r31, 2    ; *reload_inhi
>>> mov r12, r31
>>> clr r13
>>>
>>> add r14, r12  ; *addhi3
>>> adc r15, r13
>>>
>>> which now will be
>>>
>>> ldi r31, 2    ; addhi3_clobber
>>> add r14, r31
>>> adc r15, __zero_reg__
>>>
>>> Similar applies if the reload of the constant happens to LD regs:
>>>
>>> ldi r30, 2    ; *movhi
>>> clr r31
>>>
>>> add r14, r12  ; *addhi3
>>> adc r15, r13
>>>
>>> will become
>>>
>>> ldi r30, 2    ; addhi3_clobber
>>> add r14, r30
>>> adc r15, __zero_reg__
>>>
>>> For *addhi3 insns the register pressure is not reduced but the insn sequence
>>> might be smarter if peep2 comes up with a QI scratch or if it detects a
>>> *reload_inhi insn just prior to the addition (and the reg that holds the
>>> reloaded constant dies after the addition).
>>>
>>> As *addhi3 is special to reload, there is still an "ordinary" add addhi insn
>>> without scratch. This is easier because, e.g. prologue and epilogue generation
>>> generate add insns (not by means of addhi3 expander but by explicit
>>> gan_rtx_PLUS). Yet the addhi3 expander factors out the situations when an
>>> addhi3 insn is to be generated via addhi3 expander late in the compilation process
>>
>> Please provide any real world example.
>>
>> Denis.
>
> Consider avr-libc (under the assumption that it is "real world" code):
>
> In avr-libc's build directory, and with the patch integrated:
>
> $ cd avr/lib/avr4
> $ make clean && make CFLAGS='-save-temps -dp -Os'
> $ grep -A 2 'addhi3_clobber\/2' *.s > out-nopeep2.txt (see attachment)
> $ grep 'addhi3_clobber\/2' *.s | wc -l
> 33
>
> This shows that the insns are already there before peep2 and thus no reload of
> 16-bit constant is needed; an 8-bit scratch is sufficient.
>
> Alternatively, the implementation could omit the expansion to addhi3_clobber in
> addhi3 expander and instead rely completely on peep2. However, that does not
> reduce register pressure because a 16-bit register will be allocated and the
> peep2 just prints things smarter and needs just a QI scratch to call
> avr_out_plus_clobber.
>
> For +/-1, the addition with SEC/ADD/ADC resp. SEC/SBC/SBC leaves cc0 in a mess.
>  as most loops use +/-1 on the counter variable, LDI/SUB/SBC is not shorter but
> better because it sets cc0.
>
> So you like this patch?
> Or prefer a patch that is neutral with respect to register allocator and just
> uses peep2 to print things smarter?

I'm interested in code improvements.
What difference in size of avr-libc ?

Denis.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-18 18:25         ` Denis Chertykov
@ 2011-10-18 18:28           ` Denis Chertykov
  2011-10-18 19:59           ` Georg-Johann Lay
  1 sibling, 0 replies; 11+ messages in thread
From: Denis Chertykov @ 2011-10-18 18:28 UTC (permalink / raw)
  To: Georg-Johann Lay; +Cc: gcc-patches, Eric Weddington

Just a note.

Instead of `!reload_completed && !reload_in_progress'you can use
`can_create_pseudo_p()' declared in rtl.h

Denis.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-18 18:25         ` Denis Chertykov
  2011-10-18 18:28           ` Denis Chertykov
@ 2011-10-18 19:59           ` Georg-Johann Lay
  2011-10-18 22:52             ` Georg-Johann Lay
  1 sibling, 1 reply; 11+ messages in thread
From: Georg-Johann Lay @ 2011-10-18 19:59 UTC (permalink / raw)
  To: Denis Chertykov; +Cc: gcc-patches, Eric Weddington

[-- Attachment #1: Type: text/plain, Size: 4105 bytes --]

Denis Chertykov schrieb:
> 2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
>> Denis Chertykov schrieb:
>>> 2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
>>>> Denis Chertykov schrieb:
>>>>> 2011/10/18 Georg-Johann Lay <avr@gjlay.de>:
>>>>>> This patch do some tweaks to addhi3 like adding QI scratch register.
>>>>>>
>>>>>> The original *addhi3 insn is still there and located prior to new
>>>>>> addhi3_clobber insn because addhi3 is special to reload (thanks Danis for this
>>>>>> note) so that there is a version with and a version without scratch register.
>>>>>>
>>>>>> Patch passes without regressions.
>>>>>>
>>>>> Which improvements added by this patch ?
>>>>>
>>>>> Denis.
>>>> If the addhi3 is expanded early, the addition happens with QI scratch which
>>>> avoids reload of constant if target register is in NO_LD. And reduce register
>>>> pressure as only QI is needed and not reload of constant to HI.
>>>>
>>>> Otherwise, there might be sequences like
>>>>
>>>> ldi r31, 2    ; *reload_inhi
>>>> mov r12, r31
>>>> clr r13
>>>>
>>>> add r14, r12  ; *addhi3
>>>> adc r15, r13
>>>>
>>>> which now will be
>>>>
>>>> ldi r31, 2    ; addhi3_clobber
>>>> add r14, r31
>>>> adc r15, __zero_reg__
>>>>
>>>> Similar applies if the reload of the constant happens to LD regs:
>>>>
>>>> ldi r30, 2    ; *movhi
>>>> clr r31
>>>>
>>>> add r14, r12  ; *addhi3
>>>> adc r15, r13
>>>>
>>>> will become
>>>>
>>>> ldi r30, 2    ; addhi3_clobber
>>>> add r14, r30
>>>> adc r15, __zero_reg__
>>>>
>>>> For *addhi3 insns the register pressure is not reduced but the insn sequence
>>>> might be smarter if peep2 comes up with a QI scratch or if it detects a
>>>> *reload_inhi insn just prior to the addition (and the reg that holds the
>>>> reloaded constant dies after the addition).
>>>>
>>>> As *addhi3 is special to reload, there is still an "ordinary" add addhi insn
>>>> without scratch. This is easier because, e.g. prologue and epilogue generation
>>>> generate add insns (not by means of addhi3 expander but by explicit
>>>> gan_rtx_PLUS). Yet the addhi3 expander factors out the situations when an
>>>> addhi3 insn is to be generated via addhi3 expander late in the compilation process
>>> Please provide any real world example.
>>>
>>> Denis.
>> Consider avr-libc (under the assumption that it is "real world" code):
>>
>> In avr-libc's build directory, and with the patch integrated:
>>
>> $ cd avr/lib/avr4
>> $ make clean && make CFLAGS='-save-temps -dp -Os'
>> $ grep -A 2 'addhi3_clobber\/2' *.s > out-nopeep2.txt (see attachment)
>> $ grep 'addhi3_clobber\/2' *.s | wc -l
>> 33
>>
>> This shows that the insns are already there before peep2 and thus no reload of
>> 16-bit constant is needed; an 8-bit scratch is sufficient.
>>
>> Alternatively, the implementation could omit the expansion to addhi3_clobber in
>> addhi3 expander and instead rely completely on peep2. However, that does not
>> reduce register pressure because a 16-bit register will be allocated and the
>> peep2 just prints things smarter and needs just a QI scratch to call
>> avr_out_plus_clobber.
>>
>> For +/-1, the addition with SEC/ADD/ADC resp. SEC/SBC/SBC leaves cc0 in a mess.
>>  as most loops use +/-1 on the counter variable, LDI/SUB/SBC is not shorter but
>> better because it sets cc0.
>>
>> So you like this patch?
>> Or prefer a patch that is neutral with respect to register allocator and just
>> uses peep2 to print things smarter?
> 
> I'm interested in code improvements.
> What difference in size of avr-libc ?
> 
> Denis.

I have to tool for smart size analysis, so here is just a diff:

After rebuilding avr-libc with respective compiler version, did respectively:

$ find . -name 'lib[mc].a' -exec avr-size {} ';' > size-orig.txt
$ find . -name 'lib[mc].a' -exec avr-size {} ';' > size-patch.txt

and then

$ diff -U 0 size-orig.txt size-patch.txt > size.diff

As far as I can see, there is not a big gain but no object increases in size.

For some files like ./avr/lib/avr2/libc.a:dtoa_prf.o size gain is 3%.
For ./avr/lib/avr4/libc.a:vfprintf_std.o it's 1.7% and for others just one
instruction better.

Johann

[-- Attachment #2: size.diff --]
[-- Type: text/x-patch, Size: 11355 bytes --]

--- size-orig.txt	2011-10-18 19:59:52.000000000 +0200
+++ size-patch.txt	2011-10-18 19:50:59.000000000 +0200
@@ -7 +7 @@
-    750	      0	      0	    750	    2ee	dtoa_prf.o (ex ./avr/lib/avr51/libc.a)
+    724	      0	      0	    724	    2d4	dtoa_prf.o (ex ./avr/lib/avr51/libc.a)
@@ -11 +11 @@
-    722	      6	      0	    728	    2d8	malloc.o (ex ./avr/lib/avr51/libc.a)
+    720	      6	      0	    726	    2d6	malloc.o (ex ./avr/lib/avr51/libc.a)
@@ -15,2 +15,2 @@
-    510	      0	      0	    510	    1fe	realloc.o (ex ./avr/lib/avr51/libc.a)
-    747	      0	      0	    747	    2eb	strtod.o (ex ./avr/lib/avr51/libc.a)
+    506	      0	      0	    506	    1fa	realloc.o (ex ./avr/lib/avr51/libc.a)
+    739	      0	      0	    739	    2e3	strtod.o (ex ./avr/lib/avr51/libc.a)
@@ -18 +18 @@
-    536	      0	      0	    536	    218	strtoul.o (ex ./avr/lib/avr51/libc.a)
+    530	      0	      0	    530	    212	strtoul.o (ex ./avr/lib/avr51/libc.a)
@@ -246,2 +246,2 @@
-   1042	      0	      0	   1042	    412	vfprintf_std.o (ex ./avr/lib/avr51/libc.a)
-   1490	      0	      0	   1490	    5d2	vfscanf_std.o (ex ./avr/lib/avr51/libc.a)
+   1026	      0	      0	   1026	    402	vfprintf_std.o (ex ./avr/lib/avr51/libc.a)
+   1488	      0	      0	   1488	    5d0	vfscanf_std.o (ex ./avr/lib/avr51/libc.a)
@@ -423 +423 @@
-    688	      0	      0	    688	    2b0	dtoa_prf.o (ex ./avr/lib/avr35/libc.a)
+    670	      0	      0	    670	    29e	dtoa_prf.o (ex ./avr/lib/avr35/libc.a)
@@ -427 +427 @@
-    708	      6	      0	    714	    2ca	malloc.o (ex ./avr/lib/avr35/libc.a)
+    706	      6	      0	    712	    2c8	malloc.o (ex ./avr/lib/avr35/libc.a)
@@ -431,3 +431,3 @@
-    440	      0	      0	    440	    1b8	realloc.o (ex ./avr/lib/avr35/libc.a)
-    733	      0	      0	    733	    2dd	strtod.o (ex ./avr/lib/avr35/libc.a)
-    564	      0	      0	    564	    234	strtol.o (ex ./avr/lib/avr35/libc.a)
+    436	      0	      0	    436	    1b4	realloc.o (ex ./avr/lib/avr35/libc.a)
+    725	      0	      0	    725	    2d5	strtod.o (ex ./avr/lib/avr35/libc.a)
+    562	      0	      0	    562	    232	strtol.o (ex ./avr/lib/avr35/libc.a)
@@ -662,2 +662,2 @@
-    964	      0	      0	    964	    3c4	vfprintf_std.o (ex ./avr/lib/avr35/libc.a)
-   1352	      0	      0	   1352	    548	vfscanf_std.o (ex ./avr/lib/avr35/libc.a)
+    948	      0	      0	    948	    3b4	vfprintf_std.o (ex ./avr/lib/avr35/libc.a)
+   1350	      0	      0	   1350	    546	vfscanf_std.o (ex ./avr/lib/avr35/libc.a)
@@ -815 +815 @@
-    682	      0	      0	    682	    2aa	dtoa_prf.o (ex ./avr/lib/avr25/libc.a)
+    664	      0	      0	    664	    298	dtoa_prf.o (ex ./avr/lib/avr25/libc.a)
@@ -819 +819 @@
-    704	      6	      0	    710	    2c6	malloc.o (ex ./avr/lib/avr25/libc.a)
+    702	      6	      0	    708	    2c4	malloc.o (ex ./avr/lib/avr25/libc.a)
@@ -823,3 +823,3 @@
-    426	      0	      0	    426	    1aa	realloc.o (ex ./avr/lib/avr25/libc.a)
-    713	      0	      0	    713	    2c9	strtod.o (ex ./avr/lib/avr25/libc.a)
-    554	      0	      0	    554	    22a	strtol.o (ex ./avr/lib/avr25/libc.a)
+    422	      0	      0	    422	    1a6	realloc.o (ex ./avr/lib/avr25/libc.a)
+    705	      0	      0	    705	    2c1	strtod.o (ex ./avr/lib/avr25/libc.a)
+    552	      0	      0	    552	    228	strtol.o (ex ./avr/lib/avr25/libc.a)
@@ -1054,2 +1054,2 @@
-    930	      0	      0	    930	    3a2	vfprintf_std.o (ex ./avr/lib/avr25/libc.a)
-   1286	      0	      0	   1286	    506	vfscanf_std.o (ex ./avr/lib/avr25/libc.a)
+    914	      0	      0	    914	    392	vfprintf_std.o (ex ./avr/lib/avr25/libc.a)
+   1284	      0	      0	   1284	    504	vfscanf_std.o (ex ./avr/lib/avr25/libc.a)
@@ -1447 +1447 @@
-    758	      0	      0	    758	    2f6	dtoa_prf.o (ex ./avr/lib/avr31/libc.a)
+    734	      0	      0	    734	    2de	dtoa_prf.o (ex ./avr/lib/avr31/libc.a)
@@ -1451 +1451 @@
-    752	      6	      0	    758	    2f6	malloc.o (ex ./avr/lib/avr31/libc.a)
+    750	      6	      0	    756	    2f4	malloc.o (ex ./avr/lib/avr31/libc.a)
@@ -1455,4 +1455,4 @@
-    464	      0	      0	    464	    1d0	realloc.o (ex ./avr/lib/avr31/libc.a)
-    811	      0	      0	    811	    32b	strtod.o (ex ./avr/lib/avr31/libc.a)
-    634	      0	      0	    634	    27a	strtol.o (ex ./avr/lib/avr31/libc.a)
-    616	      0	      0	    616	    268	strtoul.o (ex ./avr/lib/avr31/libc.a)
+    466	      0	      0	    466	    1d2	realloc.o (ex ./avr/lib/avr31/libc.a)
+    809	      0	      0	    809	    329	strtod.o (ex ./avr/lib/avr31/libc.a)
+    630	      0	      0	    630	    276	strtol.o (ex ./avr/lib/avr31/libc.a)
+    614	      0	      0	    614	    266	strtoul.o (ex ./avr/lib/avr31/libc.a)
@@ -1686,2 +1686,2 @@
-   1064	      0	      0	   1064	    428	vfprintf_std.o (ex ./avr/lib/avr31/libc.a)
-   1582	      0	      0	   1582	    62e	vfscanf_std.o (ex ./avr/lib/avr31/libc.a)
+   1046	      0	      0	   1046	    416	vfprintf_std.o (ex ./avr/lib/avr31/libc.a)
+   1580	      0	      0	   1580	    62c	vfscanf_std.o (ex ./avr/lib/avr31/libc.a)
@@ -1791 +1791 @@
-    750	      0	      0	    750	    2ee	dtoa_prf.o (ex ./avr/lib/avr6/libc.a)
+    724	      0	      0	    724	    2d4	dtoa_prf.o (ex ./avr/lib/avr6/libc.a)
@@ -1795 +1795 @@
-    722	      6	      0	    728	    2d8	malloc.o (ex ./avr/lib/avr6/libc.a)
+    720	      6	      0	    726	    2d6	malloc.o (ex ./avr/lib/avr6/libc.a)
@@ -1799,2 +1799,2 @@
-    508	      0	      0	    508	    1fc	realloc.o (ex ./avr/lib/avr6/libc.a)
-    747	      0	      0	    747	    2eb	strtod.o (ex ./avr/lib/avr6/libc.a)
+    504	      0	      0	    504	    1f8	realloc.o (ex ./avr/lib/avr6/libc.a)
+    739	      0	      0	    739	    2e3	strtod.o (ex ./avr/lib/avr6/libc.a)
@@ -1802 +1802 @@
-    536	      0	      0	    536	    218	strtoul.o (ex ./avr/lib/avr6/libc.a)
+    530	      0	      0	    530	    212	strtoul.o (ex ./avr/lib/avr6/libc.a)
@@ -2030,2 +2030,2 @@
-   1042	      0	      0	   1042	    412	vfprintf_std.o (ex ./avr/lib/avr6/libc.a)
-   1490	      0	      0	   1490	    5d2	vfscanf_std.o (ex ./avr/lib/avr6/libc.a)
+   1026	      0	      0	   1026	    402	vfprintf_std.o (ex ./avr/lib/avr6/libc.a)
+   1488	      0	      0	   1488	    5d0	vfscanf_std.o (ex ./avr/lib/avr6/libc.a)
@@ -2135 +2135 @@
-    758	      0	      0	    758	    2f6	dtoa_prf.o (ex ./avr/lib/avr3/libc.a)
+    734	      0	      0	    734	    2de	dtoa_prf.o (ex ./avr/lib/avr3/libc.a)
@@ -2139 +2139 @@
-    752	      6	      0	    758	    2f6	malloc.o (ex ./avr/lib/avr3/libc.a)
+    750	      6	      0	    756	    2f4	malloc.o (ex ./avr/lib/avr3/libc.a)
@@ -2143,4 +2143,4 @@
-    464	      0	      0	    464	    1d0	realloc.o (ex ./avr/lib/avr3/libc.a)
-    811	      0	      0	    811	    32b	strtod.o (ex ./avr/lib/avr3/libc.a)
-    634	      0	      0	    634	    27a	strtol.o (ex ./avr/lib/avr3/libc.a)
-    616	      0	      0	    616	    268	strtoul.o (ex ./avr/lib/avr3/libc.a)
+    466	      0	      0	    466	    1d2	realloc.o (ex ./avr/lib/avr3/libc.a)
+    809	      0	      0	    809	    329	strtod.o (ex ./avr/lib/avr3/libc.a)
+    630	      0	      0	    630	    276	strtol.o (ex ./avr/lib/avr3/libc.a)
+    614	      0	      0	    614	    266	strtoul.o (ex ./avr/lib/avr3/libc.a)
@@ -2374,2 +2374,2 @@
-   1064	      0	      0	   1064	    428	vfprintf_std.o (ex ./avr/lib/avr3/libc.a)
-   1582	      0	      0	   1582	    62e	vfscanf_std.o (ex ./avr/lib/avr3/libc.a)
+   1046	      0	      0	   1046	    416	vfprintf_std.o (ex ./avr/lib/avr3/libc.a)
+   1580	      0	      0	   1580	    62c	vfscanf_std.o (ex ./avr/lib/avr3/libc.a)
@@ -2527 +2527 @@
-    688	      0	      0	    688	    2b0	dtoa_prf.o (ex ./avr/lib/avr5/libc.a)
+    670	      0	      0	    670	    29e	dtoa_prf.o (ex ./avr/lib/avr5/libc.a)
@@ -2531 +2531 @@
-    708	      6	      0	    714	    2ca	malloc.o (ex ./avr/lib/avr5/libc.a)
+    706	      6	      0	    712	    2c8	malloc.o (ex ./avr/lib/avr5/libc.a)
@@ -2535,2 +2535,2 @@
-    440	      0	      0	    440	    1b8	realloc.o (ex ./avr/lib/avr5/libc.a)
-    719	      0	      0	    719	    2cf	strtod.o (ex ./avr/lib/avr5/libc.a)
+    436	      0	      0	    436	    1b4	realloc.o (ex ./avr/lib/avr5/libc.a)
+    711	      0	      0	    711	    2c7	strtod.o (ex ./avr/lib/avr5/libc.a)
@@ -2538 +2538 @@
-    492	      0	      0	    492	    1ec	strtoul.o (ex ./avr/lib/avr5/libc.a)
+    486	      0	      0	    486	    1e6	strtoul.o (ex ./avr/lib/avr5/libc.a)
@@ -2766,2 +2766,2 @@
-    960	      0	      0	    960	    3c0	vfprintf_std.o (ex ./avr/lib/avr5/libc.a)
-   1352	      0	      0	   1352	    548	vfscanf_std.o (ex ./avr/lib/avr5/libc.a)
+    944	      0	      0	    944	    3b0	vfprintf_std.o (ex ./avr/lib/avr5/libc.a)
+   1350	      0	      0	   1350	    546	vfscanf_std.o (ex ./avr/lib/avr5/libc.a)
@@ -3855 +3855 @@
-    682	      0	      0	    682	    2aa	dtoa_prf.o (ex ./avr/lib/avr4/libc.a)
+    664	      0	      0	    664	    298	dtoa_prf.o (ex ./avr/lib/avr4/libc.a)
@@ -3859 +3859 @@
-    704	      6	      0	    710	    2c6	malloc.o (ex ./avr/lib/avr4/libc.a)
+    702	      6	      0	    708	    2c4	malloc.o (ex ./avr/lib/avr4/libc.a)
@@ -3863,2 +3863,2 @@
-    426	      0	      0	    426	    1aa	realloc.o (ex ./avr/lib/avr4/libc.a)
-    697	      0	      0	    697	    2b9	strtod.o (ex ./avr/lib/avr4/libc.a)
+    422	      0	      0	    422	    1a6	realloc.o (ex ./avr/lib/avr4/libc.a)
+    689	      0	      0	    689	    2b1	strtod.o (ex ./avr/lib/avr4/libc.a)
@@ -3866 +3866 @@
-    482	      0	      0	    482	    1e2	strtoul.o (ex ./avr/lib/avr4/libc.a)
+    476	      0	      0	    476	    1dc	strtoul.o (ex ./avr/lib/avr4/libc.a)
@@ -4094,2 +4094,2 @@
-    930	      0	      0	    930	    3a2	vfprintf_std.o (ex ./avr/lib/avr4/libc.a)
-   1286	      0	      0	   1286	    506	vfscanf_std.o (ex ./avr/lib/avr4/libc.a)
+    914	      0	      0	    914	    392	vfprintf_std.o (ex ./avr/lib/avr4/libc.a)
+   1284	      0	      0	   1284	    504	vfscanf_std.o (ex ./avr/lib/avr4/libc.a)
@@ -4379 +4379 @@
-    752	      0	      0	    752	    2f0	dtoa_prf.o (ex ./avr/lib/avr2/libc.a)
+    728	      0	      0	    728	    2d8	dtoa_prf.o (ex ./avr/lib/avr2/libc.a)
@@ -4383 +4383 @@
-    748	      6	      0	    754	    2f2	malloc.o (ex ./avr/lib/avr2/libc.a)
+    746	      6	      0	    752	    2f0	malloc.o (ex ./avr/lib/avr2/libc.a)
@@ -4387,4 +4387,4 @@
-    450	      0	      0	    450	    1c2	realloc.o (ex ./avr/lib/avr2/libc.a)
-    791	      0	      0	    791	    317	strtod.o (ex ./avr/lib/avr2/libc.a)
-    624	      0	      0	    624	    270	strtol.o (ex ./avr/lib/avr2/libc.a)
-    606	      0	      0	    606	    25e	strtoul.o (ex ./avr/lib/avr2/libc.a)
+    452	      0	      0	    452	    1c4	realloc.o (ex ./avr/lib/avr2/libc.a)
+    789	      0	      0	    789	    315	strtod.o (ex ./avr/lib/avr2/libc.a)
+    620	      0	      0	    620	    26c	strtol.o (ex ./avr/lib/avr2/libc.a)
+    604	      0	      0	    604	    25c	strtoul.o (ex ./avr/lib/avr2/libc.a)
@@ -4618,2 +4618,2 @@
-   1030	      0	      0	   1030	    406	vfprintf_std.o (ex ./avr/lib/avr2/libc.a)
-   1516	      0	      0	   1516	    5ec	vfscanf_std.o (ex ./avr/lib/avr2/libc.a)
+   1012	      0	      0	   1012	    3f4	vfprintf_std.o (ex ./avr/lib/avr2/libc.a)
+   1514	      0	      0	   1514	    5ea	vfscanf_std.o (ex ./avr/lib/avr2/libc.a)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-18 19:59           ` Georg-Johann Lay
@ 2011-10-18 22:52             ` Georg-Johann Lay
  2011-10-19 11:59               ` Denis Chertykov
  0 siblings, 1 reply; 11+ messages in thread
From: Georg-Johann Lay @ 2011-10-18 22:52 UTC (permalink / raw)
  To: gcc-patches; +Cc: Denis Chertykov, Eric Weddington

Georg-Johann Lay schrieb:
> Denis Chertykov schrieb:
> 
>>What difference in size of avr-libc ?
> 
> I have no tool for smart size analysis, so here is just a diff:
> 
> After rebuilding avr-libc with respective compiler version, did respectively:
> 
> $ find . -name 'lib[mc].a' -exec avr-size {} ';' > size-orig.txt
> $ find . -name 'lib[mc].a' -exec avr-size {} ';' > size-patch.txt
> 
> and then
> 
> $ diff -U 0 size-orig.txt size-patch.txt > size.diff
> 
> As far as I can see, there is not a big gain but no object increases in size.
> 
> For some files like ./avr/lib/avr2/libc.a:dtoa_prf.o size gain is 3%.
> For ./avr/lib/avr4/libc.a:vfprintf_std.o it's 1.7% and for others just one
> instruction better.

Actually there are some cases where the size increases by one instruction:

> -    464	      0	      0	    464	    1d0	realloc.o (ex ./avr/lib/avr31/libc.a)
> +    466	      0	      0	    466	    1d2	realloc.o (ex ./avr/lib/avr31/libc.a)

> -    464	      0	      0	    464	    1d0	realloc.o (ex ./avr/lib/avr3/libc.a)
> +    466	      0	      0	    466	    1d2	realloc.o (ex ./avr/lib/avr3/libc.a)

Will have a look tomorrow; presumably it's adding +/-1 that force a 
scratch whilst the old code does not.

Johann

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-18 22:52             ` Georg-Johann Lay
@ 2011-10-19 11:59               ` Denis Chertykov
  2011-10-19 15:24                 ` Georg-Johann Lay
  0 siblings, 1 reply; 11+ messages in thread
From: Denis Chertykov @ 2011-10-19 11:59 UTC (permalink / raw)
  To: Georg-Johann Lay; +Cc: gcc-patches, Eric Weddington

2011/10/19 Georg-Johann Lay <avr@gjlay.de>:
> Georg-Johann Lay schrieb:
>>
>> Denis Chertykov schrieb:
>>
>>> What difference in size of avr-libc ?
>>
>> I have no tool for smart size analysis, so here is just a diff:
>>
>> After rebuilding avr-libc with respective compiler version, did
>> respectively:
>>
>> $ find . -name 'lib[mc].a' -exec avr-size {} ';' > size-orig.txt
>> $ find . -name 'lib[mc].a' -exec avr-size {} ';' > size-patch.txt
>>
>> and then
>>
>> $ diff -U 0 size-orig.txt size-patch.txt > size.diff
>>
>> As far as I can see, there is not a big gain but no object increases in
>> size.
>>
>> For some files like ./avr/lib/avr2/libc.a:dtoa_prf.o size gain is 3%.
>> For ./avr/lib/avr4/libc.a:vfprintf_std.o it's 1.7% and for others just one
>> instruction better.
>
> Actually there are some cases where the size increases by one instruction:
>
>> -    464              0       0     464     1d0 realloc.o (ex
>> ./avr/lib/avr31/libc.a)
>> +    466              0       0     466     1d2 realloc.o (ex
>> ./avr/lib/avr31/libc.a)
>
>> -    464              0       0     464     1d0 realloc.o (ex
>> ./avr/lib/avr3/libc.a)
>> +    466              0       0     466     1d2 realloc.o (ex
>> ./avr/lib/avr3/libc.a)
>
> Will have a look tomorrow; presumably it's adding +/-1 that force a scratch
> whilst the old code does not.
>

However results are good.
The patch can be committed.

Denis.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Patch,AVR]: PR50447: Tweak addhi3
  2011-10-19 11:59               ` Denis Chertykov
@ 2011-10-19 15:24                 ` Georg-Johann Lay
  0 siblings, 0 replies; 11+ messages in thread
From: Georg-Johann Lay @ 2011-10-19 15:24 UTC (permalink / raw)
  To: Denis Chertykov; +Cc: gcc-patches, Eric Weddington

Denis Chertykov schrieb:
>> Georg-Johann Lay schrieb:
>>> Denis Chertykov schrieb:
>>>
>>>> What difference in size of avr-libc ?
>>> I have no tool for smart size analysis, so here is just a diff:
>>>
>>> After rebuilding avr-libc with respective compiler version, did
>>> respectively:
>>>
>>> $ find . -name 'lib[mc].a' -exec avr-size {} ';' > size-orig.txt
>>> $ find . -name 'lib[mc].a' -exec avr-size {} ';' > size-patch.txt
>>>
>>> and then
>>>
>>> $ diff -U 0 size-orig.txt size-patch.txt > size.diff
>>>
>>> As far as I can see, there is not a big gain but no object increases in
>>> size.
>>>
>>> For some files like ./avr/lib/avr2/libc.a:dtoa_prf.o size gain is 3%.
>>> For ./avr/lib/avr4/libc.a:vfprintf_std.o it's 1.7% and for others just one
>>> instruction better.
>> Actually there are some cases where the size increases by one instruction:
>>
>>> -    464              0       0     464     1d0 realloc.o (ex
>>> ./avr/lib/avr31/libc.a)
>>> +    466              0       0     466     1d2 realloc.o (ex
>>> ./avr/lib/avr31/libc.a)
>>> -    464              0       0     464     1d0 realloc.o (ex
>>> ./avr/lib/avr3/libc.a)
>>> +    466              0       0     466     1d2 realloc.o (ex
>>> ./avr/lib/avr3/libc.a)
>> Will have a look tomorrow; presumably it's adding +/-1 that force a scratch
>> whilst the old code does not.

Is because different spill generation. Both cases do spilling (same frame size)
but a bit different. If there was a MOVW instruction the code would be smaller,
so I think it's a rare corner case.

> However results are good.
> The patch can be committed.
> 
> Denis.

Applied with the proposed can_create_pseudo_p()

Johann

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2011-10-19 15:04 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-10-18 12:22 [Patch,AVR]: PR50447: Tweak addhi3 Georg-Johann Lay
2011-10-18 12:39 ` Denis Chertykov
2011-10-18 13:51   ` Georg-Johann Lay
2011-10-18 14:23     ` Denis Chertykov
2011-10-18 17:37       ` Georg-Johann Lay
2011-10-18 18:25         ` Denis Chertykov
2011-10-18 18:28           ` Denis Chertykov
2011-10-18 19:59           ` Georg-Johann Lay
2011-10-18 22:52             ` Georg-Johann Lay
2011-10-19 11:59               ` Denis Chertykov
2011-10-19 15:24                 ` Georg-Johann Lay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).