public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
* PR/23303 (not using leal on ix86)
@ 2005-08-22 11:23 Paolo Bonzini
  0 siblings, 0 replies; only message in thread
From: Paolo Bonzini @ 2005-08-22 11:23 UTC (permalink / raw)
  To: gcc

It looks like Jan's patch at 
http://gcc.gnu.org/ml/gcc-patches/2005-07/msg02021.html is causing a 
code size regression in i686.  I'll consider a 32-bit target, and this 
testcase

   char **
   VTallocbuf(char **allbuf, unsigned long savelines)
   {
     return &allbuf[savelines];
   }

For i686 we produce

         movl    8(%esp), %eax
         movl    4(%esp), %edx
         sall    $2, %eax
         addl    %edx, %eax

instead of

         movl    8(%esp), %eax
         movl    4(%esp), %edx
         leal    (%edx,%eax,4), %eax

However even in this case a no-lea code could be feasible:

         movl    8(%esp), %eax
         sall    $2, %eax
         addl    4(%esp), %eax

And this is exactly the rtl we have until peephole2, where this peephole 
splits the instruction:

;; Don't do logical operations with memory inputs.
(define_peephole2
   [(match_scratch:SI 2 "r")
    (parallel [(set (match_operand:SI 0 "register_operand" "")
                    (match_operator:SI 3 "arith_or_logical_operator"
                      [(match_dup 0)
                       (match_operand:SI 1 "memory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
   "! optimize_size && ! TARGET_READ_MODIFY"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
               (clobber (reg:CC FLAGS_REG))])]
   "")

I think that Jan's patch should be conditionalized: if !optimize_size && 
!TARGET_READ_MODIFY, the transformation he removed will be done anyway, 
and too late in the game.

Let's see what the hunks do...

Index: expr.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expr.c,v
retrieving revision 1.806
diff -c -3 -p -r1.806 expr.c
*** expr.c	25 Jul 2005 12:04:45 -0000	1.806
--- expr.c	29 Jul 2005 12:10:40 -0000
*************** expand_expr_real_1 (tree exp, rtx target
*** 6578,6595 ****
         target = 0;
       }

-   /* If will do cse, generate all results into pseudo registers
-      since 1) that allows cse to find more things
-      and 2) otherwise cse could produce an insn the machine
-      cannot support.  An exception is a CONSTRUCTOR into a multi-word
-      MEM: that's much more likely to be most efficient into the MEM.
-      Another is a CALL_EXPR which must return in memory.  */
-
-   if (! cse_not_expected && mode != BLKmode && target
-       && (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
-       && ! (code == CONSTRUCTOR && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
-       && ! (code == CALL_EXPR && aggregate_value_p (exp, exp)))
-     target = 0;

     switch (code)
       {

I think this ought to be left in.  Not because CSE can find more things, 
but because in general an instruction selection pass ought to recombine 
MEMs at their usage points if it is worthwhile.  To this end, 
ix86_rtx_costs could be taught about TARGET_READ_MODIFY, with something 
like:

    case MEM:
      if (!optimize && !TARGET_READ_MODIFY
          && GET_RTX_CLASS (outer_code) == RTX_BIN_ARITH)
        *total++;
      break;

Also consider that we still lack tree-based load PRE (PR23455), so we 
still need CSE to "find more things".  Load PRE is the major obstacle 
towards removing CSE path following.

--- 6578,6583 ----
Index: optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.287
diff -c -3 -p -r1.287 optabs.c
*** optabs.c	12 Jul 2005 09:20:02 -0000	1.287
--- optabs.c	29 Jul 2005 12:10:41 -0000
*************** expand_vec_cond_expr (tree vec_cond_expr
*** 5475,5481 ****
     if (icode == CODE_FOR_nothing)
       return 0;

!   if (!target)
       target = gen_reg_rtx (mode);

     /* Get comparison rtx.  First expand both cond expr operands.  */
--- 5475,5481 ----
     if (icode == CODE_FOR_nothing)
       return 0;

!   if (!target || !insn_data[icode].operand[0].predicate (target, mode))
       target = gen_reg_rtx (mode);

     /* Get comparison rtx.  First expand both cond expr operands.  */

This is partially unrelated, it does not hurt.

Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.843
diff -c -3 -p -r1.843 i386.c
*** config/i386/i386.c	18 Jul 2005 06:39:18 -0000	1.843
--- config/i386/i386.c	29 Jul 2005 12:10:42 -0000
*************** ix86_fixup_binary_operands (enum rtx_cod
*** 8154,8170 ****
         && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
       src1 = force_reg (mode, src1);

-   /* If optimizing, copy to regs to improve CSE */
-   if (optimize && ! no_new_pseudos)
-     {
-       if (GET_CODE (dst) == MEM)
- 	dst = gen_reg_rtx (mode);
-       if (GET_CODE (src1) == MEM)
- 	src1 = force_reg (mode, src1);
-       if (GET_CODE (src2) == MEM)
- 	src2 = force_reg (mode, src2);
-     }
-
     src1 = operands[1] = src1;
     src2 = operands[2] = src2;
     return dst;

This should be reinstated and conditionalized as

    if (optimize && !optimize_size && !no_new_pseudos
        && !TARGET_READ_MODIFY)

and likewise in ix86_expand_unary_operator.

*************** ix86_expand_fp_absneg_operator (enum rtx
*** 8410,8416 ****
     matching_memory = false;
     if (MEM_P (dst))
       {
!       if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
   	matching_memory = true;
         else
   	dst = gen_reg_rtx (mode);
--- 8626,8632 ----
     matching_memory = false;
     if (MEM_P (dst))
       {
!       if (rtx_equal_p (dst, src))
   	matching_memory = true;
         else
   	dst = gen_reg_rtx (mode);

This should become something like

    if (!rtx_equal_p (dst, src)
        || (optimize && !optimize_size && !no_new_pseudos
            && !TARGET_READ_MODIFY))
      dst = gen_reg_rtx (mode);
    else
      matching_memory = true;

Reversing the if allows to write the same test as above.

Does this make sense?

Paolo

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2005-08-22 11:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-08-22 11:23 PR/23303 (not using leal on ix86) Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).