public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
From: Uros Bizjak <ubizjak@gmail.com>
To: Ulrich Weigand <uweigand@de.ibm.com>
Cc: gcc-patches@gcc.gnu.org, GCC Development <gcc@gcc.gnu.org>,
		"H.J. Lu" <hjl.tools@gmail.com>
Subject: Re: [RFC PATCH, i386]: Allow zero_extended addresses (+ problems with reload and offsetable address, "o" constraint)
Date: Mon, 08 Aug 2011 17:12:00 -0000	[thread overview]
Message-ID: <CAFULd4Y5mie9zBP5TbvFpuDxSwbKpp=n3sh8iyr3UpSfiqiYxQ@mail.gmail.com> (raw)
In-Reply-To: <201108081530.p78FUAgM029764@d06av02.portsmouth.uk.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 2145 bytes --]

On Mon, Aug 8, 2011 at 5:30 PM, Ulrich Weigand <uweigand@de.ibm.com> wrote:
> Uros Bizjak wrote:
>
>> Although, it would be nice for reload to subsequently fix CSE'd
>> non-offsetable memory by copying address to temporary reg (*as said in
>> the documentation*), we could simply require an XMM temporary for
>> TImode reloads to/from integer registers, and this fixes ICE for x32.
>
> Moves are special as far as reload is concerned.  If there is already
> a move instruction present *before* reload, it will get fixed up
> according to its constraints as any other instruction.
>
> However, reload will *introduce* new moves as part of its operation,
> and those will *not* themselves get reloaded.  Instead, reload simply
> assumes that every plain move will just succeed without requiring
> any reload; if this is not true, the target *must* provide a
> secondary reload for this move.
>
> (Note that the secondary reload could also work by reloading the
> target address into a temporary; that's up to the target to
> implement.)

Whoa, indeed.

Using attached patch that reloads memory address instead of going
through XMM register, the code for the testcase improves from:

test:
.LFB0:
	.cfi_startproc
	pushq	%rbx
	.cfi_def_cfa_offset 16
	.cfi_offset 3, -16
	sall	$4, %esi
	addl	%edi, %esi
	movdqa	(%esi), %xmm0
	movdqa	%xmm0, -16(%rsp)
	movq	-16(%rsp), %rcx
	movq	-8(%rsp), %rbx
	addq	$1, %rcx
	adcq	$0, %rbx
	movq	%rcx, -16(%rsp)
	sall	$4, %edx
	movq	%rbx, -8(%rsp)
	movdqa	-16(%rsp), %xmm0
	movdqa	%xmm0, (%esi)
	pxor	%xmm0, %xmm0
	movdqa	%xmm0, (%edx,%esi)
	popq	%rbx
	.cfi_def_cfa_offset 8
	ret

to:

test:
.LFB0:
	.cfi_startproc
	sall	$4, %esi
	pushq	%rbx
	.cfi_def_cfa_offset 16
	.cfi_offset 3, -16
	addl	%edi, %esi
	pxor	%xmm0, %xmm0
	mov	%esi, %eax
	movq	(%rax), %rcx
	movq	8(%rax), %rbx
	addq	$1, %rcx
	adcq	$0, %rbx
	sall	$4, %edx
	movq	%rcx, (%rax)
	movq	%rbx, 8(%rax)
	movdqa	%xmm0, (%edx,%esi)
	popq	%rbx
	.cfi_def_cfa_offset 8
	ret

H.J., can you please test attached patch? This optimization won't
trigger on x86_64 anymore.

Thanks,
Uros.

[-- Attachment #2: z.diff.txt --]
[-- Type: text/plain, Size: 2657 bytes --]

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 177565)
+++ config/i386/i386.md	(working copy)
@@ -2073,6 +2073,40 @@
        (const_string "orig")))
    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,TI,DI,TI,DI,DI,DI,DI,DI")])
 
+;; Reload patterns to support multi-word load/store
+;; with non-offsetable address.
+(define_expand "reload_noff_store"
+  [(parallel [(match_operand 0 "memory_operand" "=m")
+              (match_operand 1 "register_operand" "r")
+              (match_operand:DI 2 "register_operand" "=&r")])]
+  "TARGET_64BIT"
+{
+  rtx mem = operands[0];
+  rtx addr = XEXP (mem, 0);
+
+  emit_move_insn (operands[2], addr);
+  mem = replace_equiv_address_nv (mem, operands[2]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, mem, operands[1]));
+  DONE;
+})
+
+(define_expand "reload_noff_load"
+  [(parallel [(match_operand 0 "register_operand" "=r")
+              (match_operand 1 "memory_operand" "m")
+              (match_operand:DI 2 "register_operand" "=r")])]
+  "TARGET_64BIT"
+{
+  rtx mem = operands[1];
+  rtx addr = XEXP (mem, 0);
+
+  emit_move_insn (operands[2], addr);
+  mem = replace_equiv_address_nv (mem, operands[2]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], mem));
+  DONE;
+})
+
 ;; Convert impossible stores of immediate to existing instructions.
 ;; First try to get scratch register and go through it.  In case this
 ;; fails, move by 32bit parts.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 177566)
+++ config/i386/i386.c	(working copy)
@@ -28245,18 +28245,25 @@
 
 static reg_class_t
 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
-		       enum machine_mode mode,
-		       secondary_reload_info *sri ATTRIBUTE_UNUSED)
+		       enum machine_mode mode, secondary_reload_info *sri)
 {
   /* Double-word spills from general registers to non-offsettable memory
-     references (zero-extended addresses) go through XMM register.  */
+     references (zero-extended addresses) require special handling.  */
   if (TARGET_64BIT
       && MEM_P (x)
       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
       && rclass == GENERAL_REGS
       && !offsettable_memref_p (x))
-    return SSE_REGS;
+    {
+      sri->icode = (in_p
+		    ? CODE_FOR_reload_noff_load
+		    : CODE_FOR_reload_noff_store);
+      /* Add the cost of move to a temporary.  */
+      sri->extra_cost = 1;
 
+      return NO_REGS;
+    }
+
   /* QImode spills from non-QI registers require
      intermediate register on 32bit targets.  */
   if (!TARGET_64BIT

  reply	other threads:[~2011-08-08 17:12 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-08-05 18:51 Uros Bizjak
2011-08-07 12:39 ` Uros Bizjak
2011-08-08 15:30   ` Ulrich Weigand
2011-08-08 17:12     ` Uros Bizjak [this message]
2011-08-08 17:14       ` H.J. Lu
2011-08-09  7:41       ` Uros Bizjak
2011-08-09 15:40         ` H.J. Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAFULd4Y5mie9zBP5TbvFpuDxSwbKpp=n3sh8iyr3UpSfiqiYxQ@mail.gmail.com' \
    --to=ubizjak@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=gcc@gcc.gnu.org \
    --cc=hjl.tools@gmail.com \
    --cc=uweigand@de.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).