public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [x86_64 PATCH] Add rotl64ti2_doubleword pattern to i386.md
@ 2022-07-29  6:10 Roger Sayle
  2022-07-31 17:31 ` Uros Bizjak
  0 siblings, 1 reply; 2+ messages in thread
From: Roger Sayle @ 2022-07-29  6:10 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2035 bytes --]


This patch adds rot[lr]64ti2_doubleword patterns to the x86_64 backend,
to move splitting of 128-bit TImode rotates by 64 bits after reload,
matching what we now do for 64-bit DImode rotations by 32 bits with -m32.

In theory moving when this rotation is split should have little
influence on code generation, but in practice "reload" sometimes
decides to make use of the increased flexibility to reduce the number
of registers used, and the code size, by using xchg.

For example:
__int128 x;
__int128 y;
__int128 a;
__int128 b;

void foo()
{
    unsigned __int128 t = x;
    t ^= a;
    t = (t<<64) | (t>>64);
    t ^= b;
    y = t;
}

Before:
        movq    x(%rip), %rsi
        movq    x+8(%rip), %rdi
        xorq    a(%rip), %rsi
        xorq    a+8(%rip), %rdi
        movq    %rdi, %rax
        movq    %rsi, %rdx
        xorq    b(%rip), %rax
        xorq    b+8(%rip), %rdx
        movq    %rax, y(%rip)
        movq    %rdx, y+8(%rip)
        ret

After:
        movq    x(%rip), %rax
        movq    x+8(%rip), %rdx
        xorq    a(%rip), %rax
        xorq    a+8(%rip), %rdx
        xchgq   %rdx, %rax
        xorq    b(%rip), %rax
        xorq    b+8(%rip), %rdx
        movq    %rax, y(%rip)
        movq    %rdx, y+8(%rip)
        ret

One some modern architectures this is a small win, on some older
architectures this is a small loss.  The decision which code to
generate is made in "reload", and could probably be tweaked by
register preferencing.  The much bigger win is that (eventually) all
TImode mode shifts and rotates by constants will become potential
candidates for TImode STV.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check with no new failures.  Ok for mainline?


2022-07-29  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        * config/i386/i386.md (define_expand <any_rotate>ti3): For
        rotations by 64 bits use new rot[lr]64ti2_doubleword pattern.
        (rot[lr]64ti2_doubleword): New post-reload splitter.


Thanks again,
Roger
--


[-- Attachment #2: patchrt.txt --]
[-- Type: text/plain, Size: 1276 bytes --]

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index fab6aed..f1158e1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -13820,6 +13820,8 @@
   if (const_1_to_63_operand (operands[2], VOIDmode))
     emit_insn (gen_ix86_<insn>ti3_doubleword
 		(operands[0], operands[1], operands[2]));
+  else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64)
+    emit_insn (gen_<insn>64ti2_doubleword (operands[0], operands[1]));
   else
     {
       rtx amount = force_reg (QImode, operands[2]);
@@ -14045,6 +14047,24 @@
     }
 })
 
+(define_insn_and_split "<insn>64ti2_doubleword"
+ [(set (match_operand:TI 0 "register_operand" "=r,r,r")
+       (any_rotate:TI (match_operand:TI 1 "nonimmediate_operand" "0,r,o")
+                      (const_int 64)))]
+ "TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 3))
+  (set (match_dup 2) (match_dup 1))]
+{
+  split_double_mode (TImode, &operands[0], 2, &operands[0], &operands[2]);
+  if (rtx_equal_p (operands[0], operands[1]))
+    {
+      emit_insn (gen_swapdi (operands[0], operands[2]));
+      DONE;
+    }
+})
+
 (define_mode_attr rorx_immediate_operand
 	[(SI "const_0_to_31_operand")
 	 (DI "const_0_to_63_operand")])

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [x86_64 PATCH] Add rotl64ti2_doubleword pattern to i386.md
  2022-07-29  6:10 [x86_64 PATCH] Add rotl64ti2_doubleword pattern to i386.md Roger Sayle
@ 2022-07-31 17:31 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2022-07-31 17:31 UTC (permalink / raw)
  To: Roger Sayle; +Cc: gcc-patches

On Fri, Jul 29, 2022 at 8:10 AM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> This patch adds rot[lr]64ti2_doubleword patterns to the x86_64 backend,
> to move splitting of 128-bit TImode rotates by 64 bits after reload,
> matching what we now do for 64-bit DImode rotations by 32 bits with -m32.
>
> In theory moving when this rotation is split should have little
> influence on code generation, but in practice "reload" sometimes
> decides to make use of the increased flexibility to reduce the number
> of registers used, and the code size, by using xchg.
>
> For example:
> __int128 x;
> __int128 y;
> __int128 a;
> __int128 b;
>
> void foo()
> {
>     unsigned __int128 t = x;
>     t ^= a;
>     t = (t<<64) | (t>>64);
>     t ^= b;
>     y = t;
> }
>
> Before:
>         movq    x(%rip), %rsi
>         movq    x+8(%rip), %rdi
>         xorq    a(%rip), %rsi
>         xorq    a+8(%rip), %rdi
>         movq    %rdi, %rax
>         movq    %rsi, %rdx
>         xorq    b(%rip), %rax
>         xorq    b+8(%rip), %rdx
>         movq    %rax, y(%rip)
>         movq    %rdx, y+8(%rip)
>         ret
>
> After:
>         movq    x(%rip), %rax
>         movq    x+8(%rip), %rdx
>         xorq    a(%rip), %rax
>         xorq    a+8(%rip), %rdx
>         xchgq   %rdx, %rax
>         xorq    b(%rip), %rax
>         xorq    b+8(%rip), %rdx
>         movq    %rax, y(%rip)
>         movq    %rdx, y+8(%rip)
>         ret
>
> One some modern architectures this is a small win, on some older
> architectures this is a small loss.  The decision which code to
> generate is made in "reload", and could probably be tweaked by
> register preferencing.  The much bigger win is that (eventually) all
> TImode mode shifts and rotates by constants will become potential
> candidates for TImode STV.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check with no new failures.  Ok for mainline?
>
>
> 2022-07-29  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/i386/i386.md (define_expand <any_rotate>ti3): For
>         rotations by 64 bits use new rot[lr]64ti2_doubleword pattern.
>         (rot[lr]64ti2_doubleword): New post-reload splitter.

OK.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-07-31 17:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-29  6:10 [x86_64 PATCH] Add rotl64ti2_doubleword pattern to i386.md Roger Sayle
2022-07-31 17:31 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).