public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [committed] i386: Improve access to _Atomic DImode location via XMM regs for SSE4.1 x86_32 targets
@ 2024-05-28 21:02 Uros Bizjak
  0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2024-05-28 21:02 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 942 bytes --]

Use MOVD/PEXTRD and MOVD/PINSRD insn sequences to move DImode value
between XMM and GPR register sets for SSE4.1 x86_32 targets in order
to avoid spilling the value to stack.

The load from _Atomic location a improves from:

    movq    a, %xmm0
    movq    %xmm0, (%esp)
    movl    (%esp), %eax
    movl    4(%esp), %edx

to:
    movq    a, %xmm0
    movd    %xmm0, %eax
    pextrd  $1, %xmm0, %edx

The store to _Atomic location b improves from:

    movl    %eax, (%esp)
    movl    %edx, 4(%esp)
    movq    (%esp), %xmm0
    movq    %xmm0, b

to:
    movd    %eax, %xmm0
    pinsrd  $1, %edx, %xmm0
    movq    %xmm0, b

gcc/ChangeLog:

    * config/i386/sync.md (atomic_loaddi_fpu): Use movd/pextrd
    to move DImode value from XMM to GPR for TARGET_SSE4_1.
    (atomic_storedi_fpu): Use movd/pinsrd to move DImode value
    from GPR to XMM for TARGET_SSE4_1.

Bootstrapped and regression tested on x86_64-pc-linuxgnu {,-m32}.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 1409 bytes --]

diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 8317581ebe2..f2b3ba0aa7a 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -215,8 +215,18 @@ (define_insn_and_split "atomic_loaddi_fpu"
 	}
       else
 	{
+	  rtx tmpdi = gen_lowpart (DImode, tmp);
+
 	  emit_insn (gen_loaddi_via_sse (tmp, src));
-	  emit_insn (gen_storedi_via_sse (mem, tmp));
+
+	  if (GENERAL_REG_P (dst)
+	      && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_FROM_VEC)
+	    {
+	      emit_move_insn (dst, tmpdi);
+	      DONE;
+	    }
+	  else
+	    emit_move_insn (mem, tmpdi);
 	}
 
       if (mem != dst)
@@ -294,20 +304,30 @@ (define_insn_and_split "atomic_storedi_fpu"
     emit_move_insn (dst, src);
   else
     {
-      if (REG_P (src))
-	{
-	  emit_move_insn (mem, src);
-	  src = mem;
-	}
-
       if (STACK_REG_P (tmp))
 	{
+	  if (GENERAL_REG_P (src))
+	    {
+	      emit_move_insn (mem, src);
+	      src = mem;
+	    }
+
 	  emit_insn (gen_loaddi_via_fpu (tmp, src));
 	  emit_insn (gen_storedi_via_fpu (dst, tmp));
 	}
       else
 	{
-	  emit_insn (gen_loaddi_via_sse (tmp, src));
+	  rtx tmpdi = gen_lowpart (DImode, tmp);
+
+	  if (GENERAL_REG_P (src)
+	      && !(TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC))
+	    {
+	      emit_move_insn (mem, src);
+	      src = mem;
+	    }
+
+	  emit_move_insn (tmpdi, src);
+
 	  emit_insn (gen_storedi_via_sse (dst, tmp));
 	}
     }

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-05-28 21:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-28 21:02 [committed] i386: Improve access to _Atomic DImode location via XMM regs for SSE4.1 x86_32 targets Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).