From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <jakub@sourceware.org>
Received: by sourceware.org (Postfix, from userid 2153)
	id 55BC23858D33; Tue, 14 Mar 2023 18:26:17 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 55BC23858D33
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1678818377;
	bh=gG7HFzjSUi005cfhZKdQn/pLUZHk1E299XAdvFwRWkk=;
	h=From:To:Subject:Date:From;
	b=fxq27seBHzjO81A6H+t8nR0V15yTBoiOV/NnWaqH9rJhs+8aZAR4JGgkOt8ZB2ZWt
	 cLL/vpUpipqdkJsKKBqcnSUHI54XOkz5ZNF4yCQgVyAMOYRRPVn282rU51+GepFAE0
	 WR8ku+Igff8K1PX8oNrAx1t+kqp3/eMDlOzkxwWA=
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="utf-8"
From: Jakub Jelinek <jakub@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r13-6669] i386: Fix up split_double_concat [PR109109]
X-Act-Checkin: gcc
X-Git-Author: Jakub Jelinek <jakub@redhat.com>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: 423d34f61c43e400f0d5b837fe93c83963b2ecdd
X-Git-Newrev: 42630fadbe248717859d61c0244c821c32b4e52c
Message-Id: <20230314182617.55BC23858D33@sourceware.org>
Date: Tue, 14 Mar 2023 18:26:17 +0000 (GMT)
List-Id: <gcc-cvs.sourceware.org>

https://gcc.gnu.org/g:42630fadbe248717859d61c0244c821c32b4e52c

commit r13-6669-g42630fadbe248717859d61c0244c821c32b4e52c
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue Mar 14 19:20:23 2023 +0100

    i386: Fix up split_double_concat [PR109109]
    
    In my PR107627 change I've missed one important case, which causes
    miscompilation of f4 and f6 in the following tests.
    
    Combine matches there *concatsidi3_3 define_insn_and_split (as with all
    other f* functions in those tests), and RA ends up with:
    (insn 11 10 17 2 (set (reg:DI 0 ax [89])
            (ior:DI (ashift:DI (zero_extend:DI (mem:SI (plus:SI (mult:SI (reg:SI 0 ax [94])
                                    (const_int 4 [0x4]))
                                (symbol_ref:SI ("arr") [flags 0x2]  <var_decl 0x7f4e7fe4ccf0 arr>)) [1 arr[ax_6(D)]+0 S4 A32]))
                    (const_int 32 [0x20]))
                (zero_extend:DI (reg:SI 1 dx [95])))) "pr109109-6.c":24:49 681 {*concatsidi3_3}
         (nil))
    split_double_concat turned that into:
            movl    arr(,%eax,4), %edx
            movl    %edx, %eax
    which is incorrect, because the first instruction overrides the input
    %edx value that should be put into output %eax; the two insns can't be
    swapped because the MEM's address uses %eax.
    
    The following patch fixes that case to emit
            movl    arr(,%eax,4), %eax
            xchgl   %edx, %eax
    instead.
    
    2023-03-14  Jakub Jelinek  <jakub@redhat.com>
    
            PR target/109109
            * config/i386/i386-expand.cc (split_double_concat): Fix splitting
            when lo is equal to dhi and hi is a MEM which uses dlo register.
    
            * gcc.target/i386/pr109109-1.c: New test.
            * gcc.target/i386/pr109109-2.c: New test.

Diff:
---
 gcc/config/i386/i386-expand.cc             |  17 ++-
 gcc/testsuite/gcc.target/i386/pr109109-1.c | 139 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr109109-2.c | 175 +++++++++++++++++++++++++++++
 3 files changed, 328 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 6cc8bd5c80c..e89abf2e817 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -197,9 +197,20 @@ split_double_concat (machine_mode mode, rtx dst, rtx lo, rtx hi)
     {
       /* In this case, code below would first emit_move_insn (dlo, lo)
 	 and then emit_move_insn (dhi, hi).  But the former would
-	 invalidate hi's address.  Load into dhi first.  */
-      emit_move_insn (dhi, hi);
-      hi = dhi;
+	 invalidate hi's address.  */
+      if (rtx_equal_p (dhi, lo))
+	{
+	  /* We can't load into dhi first, so load into dlo
+	     first and we'll swap.  */
+	  emit_move_insn (dlo, hi);
+	  hi = dlo;
+	}
+      else
+	{
+	  /* Load into dhi first.  */
+	  emit_move_insn (dhi, hi);
+	  hi = dhi;
+	}
     }
   if (!rtx_equal_p (dlo, hi))
     {
diff --git a/gcc/testsuite/gcc.target/i386/pr109109-1.c b/gcc/testsuite/gcc.target/i386/pr109109-1.c
new file mode 100644
index 00000000000..ab133649f2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109109-1.c
@@ -0,0 +1,139 @@
+/* PR target/109109 */
+/* { dg-do run { target ia32 } } */
+/* { dg-options "-O2" } */
+
+unsigned int arr[64];
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f1 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) arr[ax]) << 32) | ax;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f2 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) arr[dx]) << 32) | ax;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f3 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) ((unsigned int *) (((char *) arr) + ax))[dx]) << 32) | ax;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f4 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) arr[ax]) << 32) | dx;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f5 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) arr[dx]) << 32) | dx;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f6 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) ((unsigned int *) (((char *) arr) + ax))[dx]) << 32) | dx;
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f7 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+  return (((unsigned long long) arr[ax]) << 32) | cx;
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f8 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+  return (((unsigned long long) arr[dx]) << 32) | cx;
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f9 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+  return (((unsigned long long) ((unsigned int *) (((char *) arr) + ax))[dx]) << 32) | cx;
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f10 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) ax) << 32) | arr[ax];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f11 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) ax) << 32) | arr[dx];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f12 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) ax) << 32) | ((unsigned int *) (((char *) arr) + ax))[dx];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f13 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) dx) << 32) | arr[ax];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f14 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) dx) << 32) | arr[dx];
+}
+
+__attribute__((noipa, regparm (2))) unsigned long long
+f15 (unsigned int ax, unsigned int dx)
+{
+  return (((unsigned long long) dx) << 32) | ((unsigned int *) (((char *) arr) + ax))[dx];
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f16 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+  return (((unsigned long long) cx) << 32) | arr[ax];
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f17 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+  return (((unsigned long long) cx) << 32) | arr[dx];
+}
+
+__attribute__((noipa, regparm (3))) unsigned long long
+f18 (unsigned int ax, unsigned int dx, unsigned int cx)
+{
+  return (((unsigned long long) cx) << 32) | ((unsigned int *) (((char *) arr) + ax))[dx];
+}
+
+int
+main ()
+{
+  for (int i = 0; i < 64; i++)
+    arr[i] = 64 + i;
+#define CHECK_EQ(x, y) do { if (x != y) __builtin_abort (); } while (0)
+  CHECK_EQ (f1 (8, 9), 0x4800000008ULL);
+  CHECK_EQ (f2 (8, 9), 0x4900000008ULL);
+  CHECK_EQ (f3 (8, 9), 0x4b00000008ULL);
+  CHECK_EQ (f4 (8, 9), 0x4800000009ULL);
+  CHECK_EQ (f5 (8, 9), 0x4900000009ULL);
+  CHECK_EQ (f6 (8, 9), 0x4b00000009ULL);
+  CHECK_EQ (f7 (8, 9, 10), 0x480000000aULL);
+  CHECK_EQ (f8 (8, 9, 10), 0x490000000aULL);
+  CHECK_EQ (f9 (8, 9, 10), 0x4b0000000aULL);
+  CHECK_EQ (f10 (8, 9), 0x800000048ULL);
+  CHECK_EQ (f11 (8, 9), 0x800000049ULL);
+  CHECK_EQ (f12 (8, 9), 0x80000004bULL);
+  CHECK_EQ (f13 (8, 9), 0x900000048ULL);
+  CHECK_EQ (f14 (8, 9), 0x900000049ULL);
+  CHECK_EQ (f15 (8, 9), 0x90000004bULL);
+  CHECK_EQ (f16 (8, 9, 10), 0xa00000048ULL);
+  CHECK_EQ (f17 (8, 9, 10), 0xa00000049ULL);
+  CHECK_EQ (f18 (8, 9, 10), 0xa0000004bULL);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr109109-2.c b/gcc/testsuite/gcc.target/i386/pr109109-2.c
new file mode 100644
index 00000000000..48d440ada9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109109-2.c
@@ -0,0 +1,175 @@
+/* PR target/109109 */
+/* { dg-do run { target lp64 } } */
+/* { dg-options "-O2" } */
+
+unsigned long arr[64];
+
+__attribute__((noipa)) unsigned __int128
+f1 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) arr[ax]) << 64) | ax;
+}
+
+__attribute__((noipa)) unsigned __int128
+f2 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) arr[dx]) << 64) | ax;
+}
+
+__attribute__((noipa)) unsigned __int128
+f3 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) ((unsigned long *) (((char *) arr) + ax))[dx]) << 64) | ax;
+}
+
+__attribute__((noipa)) unsigned __int128
+f4 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) arr[ax]) << 64) | dx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f5 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) arr[dx]) << 64) | dx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f6 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) ((unsigned long *) (((char *) arr) + ax))[dx]) << 64) | dx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f7 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) arr[ax]) << 64) | cx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f8 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) arr[dx]) << 64) | cx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f9 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) ((unsigned long *) (((char *) arr) + ax))[dx]) << 64) | cx;
+}
+
+__attribute__((noipa)) unsigned __int128
+f10 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) ax) << 64) | arr[ax];
+}
+
+__attribute__((noipa)) unsigned __int128
+f11 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) ax) << 64) | arr[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f12 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) ax) << 64) | ((unsigned long *) (((char *) arr) + ax))[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f13 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) dx) << 64) | arr[ax];
+}
+
+__attribute__((noipa)) unsigned __int128
+f14 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) dx) << 64) | arr[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f15 (unsigned long di, unsigned long si, unsigned long dx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) dx) << 64) | ((unsigned long *) (((char *) arr) + ax))[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f16 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) cx) << 64) | arr[ax];
+}
+
+__attribute__((noipa)) unsigned __int128
+f17 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) cx) << 64) | arr[dx];
+}
+
+__attribute__((noipa)) unsigned __int128
+f18 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx)
+{
+  unsigned long ax;
+  asm ("" : "=a" (ax) : "0" (di));
+  return (((unsigned __int128) cx) << 64) | ((unsigned long *) (((char *) arr) + ax))[dx];
+}
+
+int
+main ()
+{
+  for (int i = 0; i < 64; i++)
+    arr[i] = 64 + i;
+#define CHECK_EQ(x, y1, y2) do { unsigned __int128 y = y1; y <<= 64; y += y2; if (x != y) __builtin_abort (); } while (0)
+  CHECK_EQ (f1 (8, 0, 9), 0x48, 0x8);
+  CHECK_EQ (f2 (8, 0, 9), 0x49, 0x8);
+  CHECK_EQ (f3 (8, 0, 9), 0x4a, 0x8);
+  CHECK_EQ (f4 (8, 0, 9), 0x48, 0x9);
+  CHECK_EQ (f5 (8, 0, 9), 0x49, 0x9);
+  CHECK_EQ (f6 (8, 0, 9), 0x4a, 0x9);
+  CHECK_EQ (f7 (8, 0, 9, 10), 0x48, 0xa);
+  CHECK_EQ (f8 (8, 0, 9, 10), 0x49, 0xa);
+  CHECK_EQ (f9 (8, 0, 9, 10), 0x4a, 0xa);
+  CHECK_EQ (f10 (8, 0, 9), 0x8, 0x48);
+  CHECK_EQ (f11 (8, 0, 9), 0x8, 0x49);
+  CHECK_EQ (f12 (8, 0, 9), 0x8, 0x4a);
+  CHECK_EQ (f13 (8, 0, 9), 0x9, 0x48);
+  CHECK_EQ (f14 (8, 0, 9), 0x9, 0x49);
+  CHECK_EQ (f15 (8, 0, 9), 0x9, 0x4a);
+  CHECK_EQ (f16 (8, 0, 9, 10), 0xa, 0x48);
+  CHECK_EQ (f17 (8, 0, 9, 10), 0xa, 0x49);
+  CHECK_EQ (f18 (8, 0, 9, 10), 0xa, 0x4a);
+}