public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* PATCH [11/n]: Prepare x32: PR rtl-optimization/48155: Reload doesn't handle subreg properly
@ 2011-06-25 19:21 H.J. Lu
  2011-06-27 15:04 ` Ulrich Weigand
  0 siblings, 1 reply; 5+ messages in thread
From: H.J. Lu @ 2011-06-25 19:21 UTC (permalink / raw)
  To: gcc-patches, uweigand, bernds

Hi,

This is the last target independent patch for x32.  I will start
submitting x86 specific patches in a week.

Given input:

(plus:SI (subreg:SI (plus:DI (reg/f:DI 7 sp)
            (const_int 16 [0x10])) 0)
    (const_int -1 [0xffffffffffffffff]))

reloads tries to add

(subreg:SI (plus:DI (reg/f:DI 7 sp)
        (const_int 16 [0x10])) 0)

to

(reg:SI 1 dx)

This code:

 /* How to do this reload can get quite tricky.  Normally, we are being
     asked to reload a simple operand, such as a MEM, a constant, or a
pseudo
     register that didn't get a hard register.  In that case we can just
     call emit_move_insn.

     We can also be asked to reload a PLUS that adds a register or a MEM
to
     another register, constant or MEM.  This can occur during frame
pointer
     elimination and while reloading addresses.  This case is handled by
     trying to emit a single insn to perform the add.  If it is not
valid,
     we use a two insn sequence.

     Or we can be asked to reload an unary operand that was a fragment
of
     an addressing mode, into a register.  If it isn't recognized as-is,
     we try making the unop operand and the reload-register the same:
     (set reg:X (unop:X expr:Y))
     -> (set reg:Y expr:Y) (set reg:X (unop:X reg:Y)).

     Finally, we could be called to handle an 'o' constraint by putting
     an address into a register.  In that case, we first try to do this
     with a named pattern of "reload_load_address".  If no such pattern
     exists, we just emit a SET insn and hope for the best (it will
normally
     be valid on machines that use 'o').

     This entire process is made complex because reload will never
     process the insns we generate here and so we must ensure that
     they will fit their constraints and also by the fact that parts of
     IN might be being reloaded separately and replaced with spill
registers.
     Because of this, we are, in some sense, just guessing the right
approach
     here.  The one listed above seems to work.

     ??? At some point, this whole thing needs to be rethought.  */

  if (GET_CODE (in) == PLUS
      && (REG_P (XEXP (in, 0))
          || GET_CODE (XEXP (in, 0)) == SUBREG
          || MEM_P (XEXP (in, 0)))
      && (REG_P (XEXP (in, 1))
          || GET_CODE (XEXP (in, 1)) == SUBREG
          || CONSTANT_P (XEXP (in, 1))
          || MEM_P (XEXP (in, 1))))

doesn't check if XEXP (in, 0/1) is a SUBREG of REG.

This patch adds a new function, reload_plus_ok, to check this condition.
OK for trunk?

Thanks.


H.J.
---
2011-06-25  H.J. Lu  <hongjiu.lu@intel.com>

	PR rtl-optimization/48155
	* reload1.c (reload_plus_ok): New.
	(gen_reload_chain_without_interm_reg_p): Use it.
	(gen_reload): Likewise.

diff --git a/gcc/reload1.c b/gcc/reload1.c
index e65503b..1864ae6 100644
--- a/gcc/reload1.c
+++ b/gcc/reload1.c
@@ -5544,6 +5544,54 @@ substitute (rtx *where, const_rtx what, rtx repl)
     }
 }
 
+/* Return TRUE if IN is a valid plus operation.  */
+
+static bool
+reload_plus_ok (rtx in)
+{
+  if (GET_CODE (in) == PLUS)
+    {
+      rtx op0 = XEXP (in, 0);
+      rtx op1 = XEXP (in, 1);
+      if ((REG_P (op0)
+	   || GET_CODE (op0) == SUBREG
+	   || MEM_P (op0))
+	  && (REG_P (op1)
+	      || GET_CODE (op1) == SUBREG
+	      || CONSTANT_P (op1)
+	      || MEM_P (op1)))
+	{
+	  rtx subreg, other;
+	  if (GET_CODE (op0) == SUBREG)
+	    {
+	      subreg = SUBREG_REG (op0);
+	      other = op1;
+	    }
+	  else if (GET_CODE (op1) == SUBREG)
+	    {
+	      subreg = SUBREG_REG (op1);
+	      other = op0;
+	    }
+	  else
+	    return true;
+
+	  /* Avoid
+	     (plus (subreg (plus (reg)
+				 (const_int NNN)))
+		   (const_int NNN))
+	   */
+	  if (GET_CODE (subreg) == PLUS
+	      && (CONSTANT_P (XEXP (subreg, 0))
+		  || CONSTANT_P (XEXP (subreg, 1)))
+	      && CONSTANT_P (other))
+	    return false;
+
+	  return true;
+	}
+    }
+  return false;
+}
+
 /* The function returns TRUE if chain of reload R1 and R2 (in any
    order) can be evaluated without usage of intermediate register for
    the reload containing another reload.  It is important to see
@@ -5596,14 +5644,7 @@ gen_reload_chain_without_interm_reg_p (int r1, int r2)
      opposite SUBREG on OUT.  Likewise for a paradoxical SUBREG on OUT.  */
   strip_paradoxical_subreg (&in, &out);
 
-  if (GET_CODE (in) == PLUS
-      && (REG_P (XEXP (in, 0))
-	  || GET_CODE (XEXP (in, 0)) == SUBREG
-	  || MEM_P (XEXP (in, 0)))
-      && (REG_P (XEXP (in, 1))
-	  || GET_CODE (XEXP (in, 1)) == SUBREG
-	  || CONSTANT_P (XEXP (in, 1))
-	  || MEM_P (XEXP (in, 1))))
+  if (reload_plus_ok (in))
     {
       insn = emit_insn (gen_rtx_SET (VOIDmode, out, in));
       code = recog_memoized (insn);
@@ -8449,14 +8490,7 @@ gen_reload (rtx out, rtx in, int opnum, enum reload_type type)
 
      ??? At some point, this whole thing needs to be rethought.  */
 
-  if (GET_CODE (in) == PLUS
-      && (REG_P (XEXP (in, 0))
-	  || GET_CODE (XEXP (in, 0)) == SUBREG
-	  || MEM_P (XEXP (in, 0)))
-      && (REG_P (XEXP (in, 1))
-	  || GET_CODE (XEXP (in, 1)) == SUBREG
-	  || CONSTANT_P (XEXP (in, 1))
-	  || MEM_P (XEXP (in, 1))))
+  if (reload_plus_ok (in))
     {
       /* We need to compute the sum of a register or a MEM and another
 	 register, constant, or MEM, and put it into the reload

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: PATCH [11/n]: Prepare x32: PR rtl-optimization/48155: Reload doesn't handle subreg properly
  2011-06-25 19:21 PATCH [11/n]: Prepare x32: PR rtl-optimization/48155: Reload doesn't handle subreg properly H.J. Lu
@ 2011-06-27 15:04 ` Ulrich Weigand
  2011-06-27 16:16   ` H.J. Lu
  0 siblings, 1 reply; 5+ messages in thread
From: Ulrich Weigand @ 2011-06-27 15:04 UTC (permalink / raw)
  To: hjl.tools; +Cc: gcc-patches, bernds

H.J. Lu wrote:

> Given input:
> 
> (plus:SI (subreg:SI (plus:DI (reg/f:DI 7 sp)
>             (const_int 16 [0x10])) 0)
>     (const_int -1 [0xffffffffffffffff]))

Once again, this seems weird as legitimate address ...  If this really
can occur validly, there'll probably need to be an insn+splitter and/or
a secondard reload provided by the back-end to handle it.

> reloads tries to add
> 
> (subreg:SI (plus:DI (reg/f:DI 7 sp)
>         (const_int 16 [0x10])) 0)
> 
> to
> 
> (reg:SI 1 dx)

And what happens then?  If the only problem is that this is then
rejected by the back-end, I don't think we need to change anything
in gen_reload ...

With your change below, it seems you're just falling through to
the generic gen_rtx_SET case, right?   How does this help?

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  Ulrich.Weigand@de.ibm.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: PATCH [11/n]: Prepare x32: PR rtl-optimization/48155: Reload doesn't handle subreg properly
  2011-06-27 15:04 ` Ulrich Weigand
@ 2011-06-27 16:16   ` H.J. Lu
  2011-06-27 17:06     ` Ulrich Weigand
  0 siblings, 1 reply; 5+ messages in thread
From: H.J. Lu @ 2011-06-27 16:16 UTC (permalink / raw)
  To: Ulrich Weigand; +Cc: gcc-patches, bernds

On Mon, Jun 27, 2011 at 7:52 AM, Ulrich Weigand <uweigand@de.ibm.com> wrote:
> H.J. Lu wrote:
>
>> Given input:
>>
>> (plus:SI (subreg:SI (plus:DI (reg/f:DI 7 sp)
>>             (const_int 16 [0x10])) 0)
>>     (const_int -1 [0xffffffffffffffff]))
>
> Once again, this seems weird as legitimate address ...  If this really
> can occur validly, there'll probably need to be an insn+splitter and/or
> a secondard reload provided by the back-end to handle it.

This is the valid memory address for any instructions which
take a memory operand under x32.  How will insn+splitter and/or
a secondard reload help x32 here? Do I implement such a thing for
all instructions which take a memory operand?

>> reloads tries to add
>>
>> (subreg:SI (plus:DI (reg/f:DI 7 sp)
>>         (const_int 16 [0x10])) 0)
>>
>> to
>>
>> (reg:SI 1 dx)
>
> And what happens then?  If the only problem is that this is then
> rejected by the back-end, I don't think we need to change anything
> in gen_reload ...
>
> With your change below, it seems you're just falling through to
> the generic gen_rtx_SET case, right?   How does this help?
>

I added ix86_simplify_base_disp to i386.c to handle such cases.
It translates

(set (reg:SI 40 r11)
        (plus:SI (plus:SI (mult:SI (reg:SI 1 dx)
                                   (const_int 8))
                          (subreg:SI (plus:DI (reg/f:DI 7 sp)
                                              (const_int CONST1)) 0))
                 (const_int CONST2)))

  into

   (set (reg:SI 40 r11)
        (plus:SI (plus:SI (mult:SI (reg:SI 1 dx)
                                   (const_int 8))
                          (reg/f:SI 7 sp))
                 (const_int [CONST1 + CONST2])))

   It also translates

   (plus:DI (zero_extend:DI (plus:SI (plus:SI (reg:SI 4 si [70])
                                              (reg:SI 2 cx [86]))
                                     (const_int CONST1)))
            (const_int CONST2))

   into

   (plus:DI (zero_extend:DI (plus:SI (reg:SI 4 si [70])
                                     (reg:SI 2 cx [86]))
            (const_int [CONST1 + CONST2])))

  It also translates

   (plus:SI (plus:SI (plus:SI (reg:SI 4 si [70])
                              (reg:SI 2 cx [86]))
                     (symbol_ref:SI ("A.193.2210")))
            (const_int CONST))

   into

   (plus:SI (plus:SI (reg:SI 4 si [70])
                     (reg:SI 2 cx [86]))
            (const (plus:SI (symbol_ref:SI ("A.193.2210"))
                            (const_int CONST))))

   It aslo translates

   (set (reg:SI 40 r11)
        (plus:SI (plus:SI (reg:SI 1 dx)
                          (subreg:SI (plus:DI (reg/f:DI 7 sp)
                                              (const_int CONST1)) 0))
                 (const_int CONST2)))

   into

   (set (reg:SI 40 r11)
        (plus:SI (plus:SI (reg:SI 1 dx)
                          (reg/f:SI 7 sp))
                 (const_int [CONST1 + CONST2])))


-- 
H.J.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: PATCH [11/n]: Prepare x32: PR rtl-optimization/48155: Reload doesn't handle subreg properly
  2011-06-27 16:16   ` H.J. Lu
@ 2011-06-27 17:06     ` Ulrich Weigand
  2011-06-28 14:24       ` H.J. Lu
  0 siblings, 1 reply; 5+ messages in thread
From: Ulrich Weigand @ 2011-06-27 17:06 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches, bernds

H.J. Lu wrote:
> On Mon, Jun 27, 2011 at 7:52 AM, Ulrich Weigand <uweigand@de.ibm.com> wrote:
> > H.J. Lu wrote:
> >
> >> Given input:
> >>
> >> (plus:SI (subreg:SI (plus:DI (reg/f:DI 7 sp)
> >> =A0 =A0 =A0 =A0 =A0 =A0 (const_int 16 [0x10])) 0)
> >> =A0 =A0 (const_int -1 [0xffffffffffffffff]))
> >
> > Once again, this seems weird as legitimate address ... =A0If this really
> > can occur validly, there'll probably need to be an insn+splitter and/or
> > a secondard reload provided by the back-end to handle it.
> 
> This is the valid memory address for any instructions which
> take a memory operand under x32.  How will insn+splitter and/or
> a secondard reload help x32 here? Do I implement such a thing for
> all instructions which take a memory operand?

Well, if this *is* already accepted as valid, then I don't understand
why it is getting reloaded in the first place.

> > With your change below, it seems you're just falling through to
> > the generic gen_rtx_SET case, right? =A0 How does this help?
> >
> 
> I added ix86_simplify_base_disp to i386.c to handle such cases.

I see.  It appears that this routine is used within ix86_decompose_address,
which means that:
- addresses containing constructs as above will be accepted as valid
- the simplification will *not* be done in place in the RTL stream,
  but on the fly every time the address is looked at

This explains why just emitting a plain SET is accepted.  But if this
is the case, then the PLUS case in gen_reload should also have worked,
since the first thing it tries is just a plain SET as well:

[snip]
         The simplest approach is to try to generate such an insn and see if it
         is recognized and matches its constraints.  If so, it can be used.
[snip]
      insn = emit_insn_if_valid_for_reload (gen_rtx_SET (VOIDmode, out, in));
      if (insn)
        return insn;

Can you check in your test case why this isn't accepted here?


In general, I'm wondering if it really a good idea to accept
complex non-simplified expressions like the above as valid addresses,
instead of simplifying them directly where they are generated, and
then just accepting the simplified versions.  That simplification
could occur e.g. in a secondary reload for PLUS (in those cases
where we actually require a reload), or in a legitimize_reload_address
handler (in those cases where the address can remain in place).

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  Ulrich.Weigand@de.ibm.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: PATCH [11/n]: Prepare x32: PR rtl-optimization/48155: Reload doesn't handle subreg properly
  2011-06-27 17:06     ` Ulrich Weigand
@ 2011-06-28 14:24       ` H.J. Lu
  0 siblings, 0 replies; 5+ messages in thread
From: H.J. Lu @ 2011-06-28 14:24 UTC (permalink / raw)
  To: Ulrich Weigand; +Cc: gcc-patches, bernds

On Mon, Jun 27, 2011 at 10:03 AM, Ulrich Weigand <uweigand@de.ibm.com> wrote:
> H.J. Lu wrote:
>> On Mon, Jun 27, 2011 at 7:52 AM, Ulrich Weigand <uweigand@de.ibm.com> wrote:
>> > H.J. Lu wrote:
>> >
>> >> Given input:
>> >>
>> >> (plus:SI (subreg:SI (plus:DI (reg/f:DI 7 sp)
>> >> =A0 =A0 =A0 =A0 =A0 =A0 (const_int 16 [0x10])) 0)
>> >> =A0 =A0 (const_int -1 [0xffffffffffffffff]))
>> >
>> > Once again, this seems weird as legitimate address ... =A0If this really
>> > can occur validly, there'll probably need to be an insn+splitter and/or
>> > a secondard reload provided by the back-end to handle it.
>>
>> This is the valid memory address for any instructions which
>> take a memory operand under x32.  How will insn+splitter and/or
>> a secondard reload help x32 here? Do I implement such a thing for
>> all instructions which take a memory operand?
>
> Well, if this *is* already accepted as valid, then I don't understand
> why it is getting reloaded in the first place.
>
>> > With your change below, it seems you're just falling through to
>> > the generic gen_rtx_SET case, right? =A0 How does this help?
>> >
>>
>> I added ix86_simplify_base_disp to i386.c to handle such cases.
>
> I see.  It appears that this routine is used within ix86_decompose_address,
> which means that:
> - addresses containing constructs as above will be accepted as valid
> - the simplification will *not* be done in place in the RTL stream,
>  but on the fly every time the address is looked at
>
> This explains why just emitting a plain SET is accepted.  But if this
> is the case, then the PLUS case in gen_reload should also have worked,
> since the first thing it tries is just a plain SET as well:
>
> [snip]
>         The simplest approach is to try to generate such an insn and see if it
>         is recognized and matches its constraints.  If so, it can be used.
> [snip]
>      insn = emit_insn_if_valid_for_reload (gen_rtx_SET (VOIDmode, out, in));
>      if (insn)
>        return insn;
>
> Can you check in your test case why this isn't accepted here?
>
>
> In general, I'm wondering if it really a good idea to accept
> complex non-simplified expressions like the above as valid addresses,
> instead of simplifying them directly where they are generated, and
> then just accepting the simplified versions.  That simplification
> could occur e.g. in a secondary reload for PLUS (in those cases
> where we actually require a reload), or in a legitimize_reload_address
> handler (in those cases where the address can remain in place).
>

I am testing this patch instead.  It generates better codes.

Thanks.

-- 
H.J.
----
2011-06-28  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/48155
	* config/i386/i386.md (*lea_0_x32): New.
	* config/i386/predicates.md (pointer_register_operand): Likewise.

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index de6679e..20c22c2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5432,6 +5432,34 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])

+;; Used by reload to support addresses with complex expressions.
+(define_insn_and_split "*lea_0_x32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	  (subreg:SI
+	    (plus:DI
+	      (match_operand:DI 1 "pointer_register_operand" "r")
+	      (match_operand:DI 2 "immediate_operand" "n")) 0)
+	  (match_operand:SI 3 "immediate_operand" "n")))]
+  "TARGET_X32 && !can_create_pseudo_p ()"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat, src, insn;
+  pat = gen_rtx_PLUS (DImode, operands[1], operands[2]);
+  src = gen_rtx_PLUS (SImode, gen_rtx_SUBREG (SImode, pat, 0),
+		      operands[3]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]));
+  pat = gen_rtx_PLUS (SImode, operands[1], operands[2]);
+  insn = emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  set_unique_reg_note (insn, REG_EQUIV, src);
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
 (define_insn "*lea_1"
   [(set (match_operand:P 0 "register_operand" "=r")
 	(match_operand:P 1 "no_seg_address_operand" "p"))]
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 2ceb717..55842c2 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1219,3 +1219,8 @@
 {
   return !TARGET_X32 || !SYMBOLIC_CONST (op);
 })
+
+;; Test for pointer register operand
+(define_predicate "pointer_register_operand"
+  (and (match_code "reg")
+       (match_test "REG_POINTER (op)")))

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-06-28 13:51 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-25 19:21 PATCH [11/n]: Prepare x32: PR rtl-optimization/48155: Reload doesn't handle subreg properly H.J. Lu
2011-06-27 15:04 ` Ulrich Weigand
2011-06-27 16:16   ` H.J. Lu
2011-06-27 17:06     ` Ulrich Weigand
2011-06-28 14:24       ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).