public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions
@ 2019-02-28 19:22 H.J. Lu
  2019-03-03 17:27 ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: H.J. Lu @ 2019-02-28 19:22 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
when 32-bit indices are used as addresses, like in

vgatherdps %ymm7, 0(,%ymm9,1), %ymm6

32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
for x32 if there is no base register nor symbol.

This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with

-Ofast -funroll-loops -march=haswell

gcc/

	PR target/89523
	* config/i386/i386.c (ix86_print_operand): Also handle '_' to
	add addr32 prefix if required.
	(ix86_print_operand_punct_valid_p): Allow '_'.
	* config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend
	"%_".
	(*avx512pf_gatherpf<mode>df_mask): Likewise.
	(*avx512pf_scatterpf<mode>sf_mask): Likewise.
	(*avx512pf_scatterpf<mode>df_mask): Likewise.
	(*avx2_gathersi<mode>): Likewise.
	(*avx2_gathersi<mode>_2): Likewise.
	(*avx2_gatherdi<mode>): Likewise.
	(*avx2_gatherdi<mode>_2): Likewise.
	(*avx2_gatherdi<mode>_3): Likewise.
	(*avx2_gatherdi<mode>_4): Likewise.
	(*avx512f_gathersi<mode>): Likewise.
	(*avx512f_gathersi<mode>_2): Likewise.
	(*avx512f_gatherdi<mode>): Likewise.
	(*avx512f_gatherdi<mode>_2): Likewise.
	(*avx512f_scattersi<mode>): Likewise.
	(*avx512f_scatterdi<mode>): Likewise.

gcc/testsuite/

	PR target/89523
	* gcc.target/i386/pr89523-1.c: New test.
	* gcc.target/i386/pr89523-2.c: Likewise.
	* gcc.target/i386/pr89523-3.c: Likewise.
	* gcc.target/i386/pr89523-4.c: Likewise.
	* gcc.target/i386/pr89523-5.c: Likewise.
	* gcc.target/i386/pr89523-6.c: Likewise.
	* gcc.target/i386/pr89523-7.c: Likewise.
	* gcc.target/i386/pr89523-8.c: Likewise.
	* gcc.target/i386/pr89523-9.c: Likewise.

xxx
---
 gcc/config/i386/i386.c                    | 39 ++++++++++++++++++-
 gcc/config/i386/sse.md                    | 46 +++++++++++------------
 gcc/testsuite/gcc.target/i386/pr89523-1.c | 24 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-2.c | 17 +++++++++
 gcc/testsuite/gcc.target/i386/pr89523-3.c | 17 +++++++++
 gcc/testsuite/gcc.target/i386/pr89523-4.c | 16 ++++++++
 gcc/testsuite/gcc.target/i386/pr89523-5.c | 18 +++++++++
 gcc/testsuite/gcc.target/i386/pr89523-6.c | 17 +++++++++
 gcc/testsuite/gcc.target/i386/pr89523-7.c | 19 ++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-8.c | 19 ++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-9.c | 16 ++++++++
 11 files changed, 224 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b8357a7db5d..336696136de 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17805,6 +17805,7 @@ print_reg (rtx x, int code, FILE *file)
    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
    ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
+   _ -- print addr32 prefix if required.
  */
 
 void
@@ -18356,6 +18357,42 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	    fputs ("addr32 ", file);
 	  return;
 
+	case '_':
+	  if (TARGET_X32)
+	    {
+	      subrtx_var_iterator::array_type array;
+	      FOR_EACH_SUBRTX_VAR (iter, array,
+				   PATTERN (current_output_insn), ALL)
+		{
+		  rtx addr = *iter;
+		  if (!MEM_P (addr))
+		    continue;
+		  addr = XEXP (addr, 0);
+		  if (GET_CODE (addr) == UNSPEC
+		      && XINT (addr, 1) == UNSPEC_VSIBADDR)
+		    {
+		      /* NB: 32-bit indices in VSIB address are
+			 sign-extended to 64 bits. In x32, if 32-bit
+			 address 0xf7fa3010 is sign-extended to
+			 0xfffffffff7fa3010 which is invalid address.
+			 Add addr32 prefix if there is no base register
+			 nor symbol.  */
+		      bool ok;
+		      struct ix86_address parts;
+		      ok = ix86_decompose_address (XVECEXP (addr, 0, 0),
+						   &parts);
+		      gcc_assert (ok && parts.index == NULL_RTX);
+		      if (parts.base == NULL_RTX
+			  && (parts.disp == NULL_RTX
+			      || !symbolic_operand (parts.disp,
+						    GET_MODE (parts.disp))))
+			fputs ("addr32 ", file);
+		      break;
+		    }
+		}
+	    }
+	  return;
+
 	case '!':
 	  if (ix86_notrack_prefixed_insn_p (current_output_insn))
 	    fputs ("notrack ", file);
@@ -18507,7 +18544,7 @@ static bool
 ix86_print_operand_punct_valid_p (unsigned char code)
 {
   return (code == '*' || code == '+' || code == '&' || code == ';'
-	  || code == '~' || code == '^' || code == '!');
+	  || code == '~' || code == '^' || code == '!' || code == '_');
 }
 \f
 /* Print a memory operand whose address is ADDR.  */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ac299495b2c..13692e47123 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17401,9 +17401,9 @@
     case 3:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
-      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17448,9 +17448,9 @@
     case 3:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
-      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17496,10 +17496,10 @@
     case 7:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
     case 6:
-      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17545,10 +17545,10 @@
     case 7:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
     case 6:
-      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%_vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -20292,7 +20292,7 @@
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
+  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20312,7 +20312,7 @@
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
+  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20353,7 +20353,7 @@
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
+  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20375,8 +20375,8 @@
   "TARGET_AVX2"
 {
   if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
-    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
+    return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
+  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
 }
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
@@ -20400,7 +20400,7 @@
 		     (const_int 2) (const_int 3)])))
    (clobber (match_scratch:VI4F_256 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
+  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20423,7 +20423,7 @@
 		     (const_int 2) (const_int 3)])))
    (clobber (match_scratch:VI4F_256 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
+  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20463,7 +20463,7 @@
   "TARGET_AVX512F"
 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
+  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20484,7 +20484,7 @@
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
+  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20525,7 +20525,7 @@
   "TARGET_AVX512F"
 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
+  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20550,11 +20550,11 @@
   if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
     {
       if (<MODE_SIZE> != 64)
-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
+	return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
       else
-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
+	return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
     }
-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
+  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
 }
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -20593,7 +20593,7 @@
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+  "%_v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20631,7 +20631,7 @@
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+  "%_v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1.c b/gcc/testsuite/gcc.target/i386/pr89523-1.c
new file mode 100644
index 00000000000..f7ed24d1592
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -Ofast -funroll-loops -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+void foo (void);
+
+extern float *ncost;
+
+float
+bar (int type, int num)
+{
+  int i;
+  float cost;
+
+  cost = 0;
+  for (i = 0; i < num; i++)
+    if (type)
+      cost += ncost[i];
+    else
+      foo ();
+  return (cost);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c
new file mode 100644
index 00000000000..7423f579b5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+__m128d x;
+double *base;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i32gather_pd (base, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c
new file mode 100644
index 00000000000..606f9aac659
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+__m128d x;
+double *base;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i64gather_pd (base, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c
new file mode 100644
index 00000000000..155b818191f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+__m128d x;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i32gather_pd (NULL, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c
new file mode 100644
index 00000000000..11210ff8f78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512pf" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0);
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c
new file mode 100644
index 00000000000..0254ad435e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512pf" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32gather_pd (idx, NULL, 8, _MM_HINT_T0);
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, NULL, 8, _MM_HINT_T0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c
new file mode 100644
index 00000000000..1c357bc8505
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler "\tvscatter" } } */
+/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
+
+#include <immintrin.h>
+
+volatile __m512d src;
+volatile __m256i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i32scatter_pd (addr, idx, src, 8);
+  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c
new file mode 100644
index 00000000000..37b0a0bebb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler "\tvscatter" } } */
+/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
+
+#include <immintrin.h>
+
+volatile __m512d src;
+volatile __m512i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i64scatter_pd (addr, idx, src, 8);
+  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c
new file mode 100644
index 00000000000..a878f1e9efb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler-not "\tvscatter" } } */
+/* { dg-final { scan-assembler "addr32 vscatter" } } */
+
+#include <immintrin.h>
+
+volatile __m512d src;
+volatile __m256i idx;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i32scatter_pd (NULL, idx, src, 8);
+}
-- 
2.20.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions
  2019-02-28 19:22 [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions H.J. Lu
@ 2019-03-03 17:27 ` Uros Bizjak
  2019-03-03 21:18   ` H.J. Lu
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2019-03-03 17:27 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
> when 32-bit indices are used as addresses, like in
>
> vgatherdps %ymm7, 0(,%ymm9,1), %ymm6
>
> 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
> is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
> for x32 if there is no base register nor symbol.
>
> This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with
>
> -Ofast -funroll-loops -march=haswell

1. Testcases 2 to 9 fail on fedora-29 with:

In file included from /usr/include/features.h:452,
                 from /usr/include/bits/libc-header-start.h:33,
                 from /usr/include/stdlib.h:25,
                 from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27,
                 from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34,
                 from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29,
                 from
/home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7:
/usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such
file or directory

2. Does the patch work with -maddress-mode={short,long}?

3. The implementation is wrong. You should use operand substitution
with VSIB address as operand, not substitution without operand.

4. The PR is not a regression.

Uros.

>
> gcc/
>
>         PR target/89523
>         * config/i386/i386.c (ix86_print_operand): Also handle '_' to
>         add addr32 prefix if required.
>         (ix86_print_operand_punct_valid_p): Allow '_'.
>         * config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend
>         "%_".
>         (*avx512pf_gatherpf<mode>df_mask): Likewise.
>         (*avx512pf_scatterpf<mode>sf_mask): Likewise.
>         (*avx512pf_scatterpf<mode>df_mask): Likewise.
>         (*avx2_gathersi<mode>): Likewise.
>         (*avx2_gathersi<mode>_2): Likewise.
>         (*avx2_gatherdi<mode>): Likewise.
>         (*avx2_gatherdi<mode>_2): Likewise.
>         (*avx2_gatherdi<mode>_3): Likewise.
>         (*avx2_gatherdi<mode>_4): Likewise.
>         (*avx512f_gathersi<mode>): Likewise.
>         (*avx512f_gathersi<mode>_2): Likewise.
>         (*avx512f_gatherdi<mode>): Likewise.
>         (*avx512f_gatherdi<mode>_2): Likewise.
>         (*avx512f_scattersi<mode>): Likewise.
>         (*avx512f_scatterdi<mode>): Likewise.
>
> gcc/testsuite/
>
>         PR target/89523
>         * gcc.target/i386/pr89523-1.c: New test.
>         * gcc.target/i386/pr89523-2.c: Likewise.
>         * gcc.target/i386/pr89523-3.c: Likewise.
>         * gcc.target/i386/pr89523-4.c: Likewise.
>         * gcc.target/i386/pr89523-5.c: Likewise.
>         * gcc.target/i386/pr89523-6.c: Likewise.
>         * gcc.target/i386/pr89523-7.c: Likewise.
>         * gcc.target/i386/pr89523-8.c: Likewise.
>         * gcc.target/i386/pr89523-9.c: Likewise.
>
> xxx
> ---
>  gcc/config/i386/i386.c                    | 39 ++++++++++++++++++-
>  gcc/config/i386/sse.md                    | 46 +++++++++++------------
>  gcc/testsuite/gcc.target/i386/pr89523-1.c | 24 ++++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-2.c | 17 +++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-3.c | 17 +++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-4.c | 16 ++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-5.c | 18 +++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-6.c | 17 +++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-7.c | 19 ++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-8.c | 19 ++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-9.c | 16 ++++++++
>  11 files changed, 224 insertions(+), 24 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index b8357a7db5d..336696136de 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -17805,6 +17805,7 @@ print_reg (rtx x, int code, FILE *file)
>     ~ -- print "i" if TARGET_AVX2, "f" otherwise.
>     ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
>     ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
> +   _ -- print addr32 prefix if required.
>   */
>
>  void
> @@ -18356,6 +18357,42 @@ ix86_print_operand (FILE *file, rtx x, int code)
>             fputs ("addr32 ", file);
>           return;
>
> +       case '_':
> +         if (TARGET_X32)
> +           {
> +             subrtx_var_iterator::array_type array;
> +             FOR_EACH_SUBRTX_VAR (iter, array,
> +                                  PATTERN (current_output_insn), ALL)
> +               {
> +                 rtx addr = *iter;
> +                 if (!MEM_P (addr))
> +                   continue;
> +                 addr = XEXP (addr, 0);
> +                 if (GET_CODE (addr) == UNSPEC
> +                     && XINT (addr, 1) == UNSPEC_VSIBADDR)
> +                   {
> +                     /* NB: 32-bit indices in VSIB address are
> +                        sign-extended to 64 bits. In x32, if 32-bit
> +                        address 0xf7fa3010 is sign-extended to
> +                        0xfffffffff7fa3010 which is invalid address.
> +                        Add addr32 prefix if there is no base register
> +                        nor symbol.  */
> +                     bool ok;
> +                     struct ix86_address parts;
> +                     ok = ix86_decompose_address (XVECEXP (addr, 0, 0),
> +                                                  &parts);
> +                     gcc_assert (ok && parts.index == NULL_RTX);
> +                     if (parts.base == NULL_RTX
> +                         && (parts.disp == NULL_RTX
> +                             || !symbolic_operand (parts.disp,
> +                                                   GET_MODE (parts.disp))))
> +                       fputs ("addr32 ", file);
> +                     break;
> +                   }
> +               }
> +           }
> +         return;
> +
>         case '!':
>           if (ix86_notrack_prefixed_insn_p (current_output_insn))
>             fputs ("notrack ", file);
> @@ -18507,7 +18544,7 @@ static bool
>  ix86_print_operand_punct_valid_p (unsigned char code)
>  {
>    return (code == '*' || code == '+' || code == '&' || code == ';'
> -         || code == '~' || code == '^' || code == '!');
> +         || code == '~' || code == '^' || code == '!' || code == '_');
>  }
>
>  /* Print a memory operand whose address is ADDR.  */
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index ac299495b2c..13692e47123 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -17401,9 +17401,9 @@
>      case 3:
>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>          gas changed what it requires incompatibly.  */
> -      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%_vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>      case 2:
> -      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%_vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>      default:
>        gcc_unreachable ();
>      }
> @@ -17448,9 +17448,9 @@
>      case 3:
>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>          gas changed what it requires incompatibly.  */
> -      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%_vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
>      case 2:
> -      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%_vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
>      default:
>        gcc_unreachable ();
>      }
> @@ -17496,10 +17496,10 @@
>      case 7:
>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>          gas changed what it requires incompatibly.  */
> -      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%_vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>      case 2:
>      case 6:
> -      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%_vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>      default:
>        gcc_unreachable ();
>      }
> @@ -17545,10 +17545,10 @@
>      case 7:
>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>          gas changed what it requires incompatibly.  */
> -      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%_vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
>      case 2:
>      case 6:
> -      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%_vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
>      default:
>        gcc_unreachable ();
>      }
> @@ -20292,7 +20292,7 @@
>           UNSPEC_GATHER))
>     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
> +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20312,7 +20312,7 @@
>           UNSPEC_GATHER))
>     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
> +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20353,7 +20353,7 @@
>           UNSPEC_GATHER))
>     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
> +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20375,8 +20375,8 @@
>    "TARGET_AVX2"
>  {
>    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
> -    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
> -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
> +    return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
> +  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
>  }
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
> @@ -20400,7 +20400,7 @@
>                      (const_int 2) (const_int 3)])))
>     (clobber (match_scratch:VI4F_256 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
> +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20423,7 +20423,7 @@
>                      (const_int 2) (const_int 3)])))
>     (clobber (match_scratch:VI4F_256 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
> +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20463,7 +20463,7 @@
>    "TARGET_AVX512F"
>  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
> +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20484,7 +20484,7 @@
>    "TARGET_AVX512F"
>  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
> +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20525,7 +20525,7 @@
>    "TARGET_AVX512F"
>  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
> +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20550,11 +20550,11 @@
>    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
>      {
>        if (<MODE_SIZE> != 64)
> -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
> +       return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
>        else
> -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
> +       return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
>      }
> -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
> +  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
>  }
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
> @@ -20593,7 +20593,7 @@
>    "TARGET_AVX512F"
>  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> +  "%_v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20631,7 +20631,7 @@
>    "TARGET_AVX512F"
>  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> +  "%_v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1.c b/gcc/testsuite/gcc.target/i386/pr89523-1.c
> new file mode 100644
> index 00000000000..f7ed24d1592
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-1.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -Ofast -funroll-loops -march=haswell" } */
> +/* { dg-final { scan-assembler-not "\tvgather" } } */
> +/* { dg-final { scan-assembler "addr32 vgather" } } */
> +
> +void foo (void);
> +
> +extern float *ncost;
> +
> +float
> +bar (int type, int num)
> +{
> +  int i;
> +  float cost;
> +
> +  cost = 0;
> +  for (i = 0; i < num; i++)
> +    if (type)
> +      cost += ncost[i];
> +    else
> +      foo ();
> +  return (cost);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c
> new file mode 100644
> index 00000000000..7423f579b5e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -march=haswell" } */
> +/* { dg-final { scan-assembler "\tvgather" } } */
> +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> +
> +#include <immintrin.h>
> +
> +__m128d x;
> +double *base;
> +__m128i idx;
> +
> +void extern
> +avx2_test (void)
> +{
> +  x = _mm_i32gather_pd (base, idx, 1);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c
> new file mode 100644
> index 00000000000..606f9aac659
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -march=haswell" } */
> +/* { dg-final { scan-assembler "\tvgather" } } */
> +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> +
> +#include <immintrin.h>
> +
> +__m128d x;
> +double *base;
> +__m128i idx;
> +
> +void extern
> +avx2_test (void)
> +{
> +  x = _mm_i64gather_pd (base, idx, 1);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c
> new file mode 100644
> index 00000000000..155b818191f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -march=haswell" } */
> +/* { dg-final { scan-assembler-not "\tvgather" } } */
> +/* { dg-final { scan-assembler "addr32 vgather" } } */
> +
> +#include <immintrin.h>
> +
> +__m128d x;
> +__m128i idx;
> +
> +void extern
> +avx2_test (void)
> +{
> +  x = _mm_i32gather_pd (NULL, idx, 1);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c
> new file mode 100644
> index 00000000000..11210ff8f78
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512pf" } */
> +/* { dg-final { scan-assembler "\tvgather" } } */
> +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m256i idx;
> +volatile __mmask8 m8;
> +void *base;
> +
> +void extern
> +avx512pf_test (void)
> +{
> +  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0);
> +  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c
> new file mode 100644
> index 00000000000..0254ad435e3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512pf" } */
> +/* { dg-final { scan-assembler-not "\tvgather" } } */
> +/* { dg-final { scan-assembler "addr32 vgather" } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m256i idx;
> +volatile __mmask8 m8;
> +
> +void extern
> +avx512pf_test (void)
> +{
> +  _mm512_prefetch_i32gather_pd (idx, NULL, 8, _MM_HINT_T0);
> +  _mm512_mask_prefetch_i32gather_pd (idx, m8, NULL, 8, _MM_HINT_T0);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c
> new file mode 100644
> index 00000000000..1c357bc8505
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512f" } */
> +/* { dg-final { scan-assembler "\tvscatter" } } */
> +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m512d src;
> +volatile __m256i idx;
> +volatile __mmask8 m8;
> +double *addr;
> +
> +void extern
> +avx512f_test (void)
> +{
> +  _mm512_i32scatter_pd (addr, idx, src, 8);
> +  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c
> new file mode 100644
> index 00000000000..37b0a0bebb7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512f" } */
> +/* { dg-final { scan-assembler "\tvscatter" } } */
> +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m512d src;
> +volatile __m512i idx;
> +volatile __mmask8 m8;
> +double *addr;
> +
> +void extern
> +avx512f_test (void)
> +{
> +  _mm512_i64scatter_pd (addr, idx, src, 8);
> +  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c
> new file mode 100644
> index 00000000000..a878f1e9efb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512f" } */
> +/* { dg-final { scan-assembler-not "\tvscatter" } } */
> +/* { dg-final { scan-assembler "addr32 vscatter" } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m512d src;
> +volatile __m256i idx;
> +
> +void extern
> +avx512f_test (void)
> +{
> +  _mm512_i32scatter_pd (NULL, idx, src, 8);
> +}
> --
> 2.20.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions
  2019-03-03 17:27 ` Uros Bizjak
@ 2019-03-03 21:18   ` H.J. Lu
  2019-03-03 21:34     ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: H.J. Lu @ 2019-03-03 21:18 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On Sun, Mar 3, 2019 at 9:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
> > when 32-bit indices are used as addresses, like in
> >
> > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6
> >
> > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
> > is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
> > for x32 if there is no base register nor symbol.
> >
> > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with
> >
> > -Ofast -funroll-loops -march=haswell
>
> 1. Testcases 2 to 9 fail on fedora-29 with:
>
> In file included from /usr/include/features.h:452,
>                  from /usr/include/bits/libc-header-start.h:33,
>                  from /usr/include/stdlib.h:25,
>                  from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27,
>                  from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34,
>                  from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29,
>                  from
> /home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7:
> /usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such
> file or directory

I will update tests to remove  "#include immintrin.h"

> 2. Does the patch work with -maddress-mode={short,long}?

Yes.

> 3. The implementation is wrong. You should use operand substitution
> with VSIB address as operand, not substitution without operand.

How can I add an addr32 prefix with operand substitution?  This is
very similar to "%^".  My updated patch will use "%^".

> 4. The PR is not a regression.

Correct.

H.J.
> Uros.
>
> >
> > gcc/
> >
> >         PR target/89523
> >         * config/i386/i386.c (ix86_print_operand): Also handle '_' to
> >         add addr32 prefix if required.
> >         (ix86_print_operand_punct_valid_p): Allow '_'.
> >         * config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend
> >         "%_".
> >         (*avx512pf_gatherpf<mode>df_mask): Likewise.
> >         (*avx512pf_scatterpf<mode>sf_mask): Likewise.
> >         (*avx512pf_scatterpf<mode>df_mask): Likewise.
> >         (*avx2_gathersi<mode>): Likewise.
> >         (*avx2_gathersi<mode>_2): Likewise.
> >         (*avx2_gatherdi<mode>): Likewise.
> >         (*avx2_gatherdi<mode>_2): Likewise.
> >         (*avx2_gatherdi<mode>_3): Likewise.
> >         (*avx2_gatherdi<mode>_4): Likewise.
> >         (*avx512f_gathersi<mode>): Likewise.
> >         (*avx512f_gathersi<mode>_2): Likewise.
> >         (*avx512f_gatherdi<mode>): Likewise.
> >         (*avx512f_gatherdi<mode>_2): Likewise.
> >         (*avx512f_scattersi<mode>): Likewise.
> >         (*avx512f_scatterdi<mode>): Likewise.
> >
> > gcc/testsuite/
> >
> >         PR target/89523
> >         * gcc.target/i386/pr89523-1.c: New test.
> >         * gcc.target/i386/pr89523-2.c: Likewise.
> >         * gcc.target/i386/pr89523-3.c: Likewise.
> >         * gcc.target/i386/pr89523-4.c: Likewise.
> >         * gcc.target/i386/pr89523-5.c: Likewise.
> >         * gcc.target/i386/pr89523-6.c: Likewise.
> >         * gcc.target/i386/pr89523-7.c: Likewise.
> >         * gcc.target/i386/pr89523-8.c: Likewise.
> >         * gcc.target/i386/pr89523-9.c: Likewise.
> >
> > xxx
> > ---
> >  gcc/config/i386/i386.c                    | 39 ++++++++++++++++++-
> >  gcc/config/i386/sse.md                    | 46 +++++++++++------------
> >  gcc/testsuite/gcc.target/i386/pr89523-1.c | 24 ++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-2.c | 17 +++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-3.c | 17 +++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-4.c | 16 ++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-5.c | 18 +++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-6.c | 17 +++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-7.c | 19 ++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-8.c | 19 ++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-9.c | 16 ++++++++
> >  11 files changed, 224 insertions(+), 24 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index b8357a7db5d..336696136de 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -17805,6 +17805,7 @@ print_reg (rtx x, int code, FILE *file)
> >     ~ -- print "i" if TARGET_AVX2, "f" otherwise.
> >     ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
> >     ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
> > +   _ -- print addr32 prefix if required.
> >   */
> >
> >  void
> > @@ -18356,6 +18357,42 @@ ix86_print_operand (FILE *file, rtx x, int code)
> >             fputs ("addr32 ", file);
> >           return;
> >
> > +       case '_':
> > +         if (TARGET_X32)
> > +           {
> > +             subrtx_var_iterator::array_type array;
> > +             FOR_EACH_SUBRTX_VAR (iter, array,
> > +                                  PATTERN (current_output_insn), ALL)
> > +               {
> > +                 rtx addr = *iter;
> > +                 if (!MEM_P (addr))
> > +                   continue;
> > +                 addr = XEXP (addr, 0);
> > +                 if (GET_CODE (addr) == UNSPEC
> > +                     && XINT (addr, 1) == UNSPEC_VSIBADDR)
> > +                   {
> > +                     /* NB: 32-bit indices in VSIB address are
> > +                        sign-extended to 64 bits. In x32, if 32-bit
> > +                        address 0xf7fa3010 is sign-extended to
> > +                        0xfffffffff7fa3010 which is invalid address.
> > +                        Add addr32 prefix if there is no base register
> > +                        nor symbol.  */
> > +                     bool ok;
> > +                     struct ix86_address parts;
> > +                     ok = ix86_decompose_address (XVECEXP (addr, 0, 0),
> > +                                                  &parts);
> > +                     gcc_assert (ok && parts.index == NULL_RTX);
> > +                     if (parts.base == NULL_RTX
> > +                         && (parts.disp == NULL_RTX
> > +                             || !symbolic_operand (parts.disp,
> > +                                                   GET_MODE (parts.disp))))
> > +                       fputs ("addr32 ", file);
> > +                     break;
> > +                   }
> > +               }
> > +           }
> > +         return;
> > +
> >         case '!':
> >           if (ix86_notrack_prefixed_insn_p (current_output_insn))
> >             fputs ("notrack ", file);
> > @@ -18507,7 +18544,7 @@ static bool
> >  ix86_print_operand_punct_valid_p (unsigned char code)
> >  {
> >    return (code == '*' || code == '+' || code == '&' || code == ';'
> > -         || code == '~' || code == '^' || code == '!');
> > +         || code == '~' || code == '^' || code == '!' || code == '_');
> >  }
> >
> >  /* Print a memory operand whose address is ADDR.  */
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index ac299495b2c..13692e47123 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -17401,9 +17401,9 @@
> >      case 3:
> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >          gas changed what it requires incompatibly.  */
> > -      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%_vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> >      case 2:
> > -      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%_vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> >      default:
> >        gcc_unreachable ();
> >      }
> > @@ -17448,9 +17448,9 @@
> >      case 3:
> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >          gas changed what it requires incompatibly.  */
> > -      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%_vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> >      case 2:
> > -      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%_vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> >      default:
> >        gcc_unreachable ();
> >      }
> > @@ -17496,10 +17496,10 @@
> >      case 7:
> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >          gas changed what it requires incompatibly.  */
> > -      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%_vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> >      case 2:
> >      case 6:
> > -      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%_vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> >      default:
> >        gcc_unreachable ();
> >      }
> > @@ -17545,10 +17545,10 @@
> >      case 7:
> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >          gas changed what it requires incompatibly.  */
> > -      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%_vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> >      case 2:
> >      case 6:
> > -      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%_vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> >      default:
> >        gcc_unreachable ();
> >      }
> > @@ -20292,7 +20292,7 @@
> >           UNSPEC_GATHER))
> >     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
> > +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20312,7 +20312,7 @@
> >           UNSPEC_GATHER))
> >     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
> > +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20353,7 +20353,7 @@
> >           UNSPEC_GATHER))
> >     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
> > +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20375,8 +20375,8 @@
> >    "TARGET_AVX2"
> >  {
> >    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
> > -    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
> > -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
> > +    return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
> > +  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
> >  }
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> > @@ -20400,7 +20400,7 @@
> >                      (const_int 2) (const_int 3)])))
> >     (clobber (match_scratch:VI4F_256 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
> > +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20423,7 +20423,7 @@
> >                      (const_int 2) (const_int 3)])))
> >     (clobber (match_scratch:VI4F_256 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
> > +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20463,7 +20463,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
> > +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20484,7 +20484,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
> > +  "%_v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20525,7 +20525,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
> > +  "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20550,11 +20550,11 @@
> >    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
> >      {
> >        if (<MODE_SIZE> != 64)
> > -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
> > +       return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
> >        else
> > -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
> > +       return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
> >      }
> > -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
> > +  return "%_v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
> >  }
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> > @@ -20593,7 +20593,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> > +  "%_v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20631,7 +20631,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> > +  "%_v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1.c b/gcc/testsuite/gcc.target/i386/pr89523-1.c
> > new file mode 100644
> > index 00000000000..f7ed24d1592
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-1.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -Ofast -funroll-loops -march=haswell" } */
> > +/* { dg-final { scan-assembler-not "\tvgather" } } */
> > +/* { dg-final { scan-assembler "addr32 vgather" } } */
> > +
> > +void foo (void);
> > +
> > +extern float *ncost;
> > +
> > +float
> > +bar (int type, int num)
> > +{
> > +  int i;
> > +  float cost;
> > +
> > +  cost = 0;
> > +  for (i = 0; i < num; i++)
> > +    if (type)
> > +      cost += ncost[i];
> > +    else
> > +      foo ();
> > +  return (cost);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c
> > new file mode 100644
> > index 00000000000..7423f579b5e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -march=haswell" } */
> > +/* { dg-final { scan-assembler "\tvgather" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> > +
> > +#include <immintrin.h>
> > +
> > +__m128d x;
> > +double *base;
> > +__m128i idx;
> > +
> > +void extern
> > +avx2_test (void)
> > +{
> > +  x = _mm_i32gather_pd (base, idx, 1);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c
> > new file mode 100644
> > index 00000000000..606f9aac659
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -march=haswell" } */
> > +/* { dg-final { scan-assembler "\tvgather" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> > +
> > +#include <immintrin.h>
> > +
> > +__m128d x;
> > +double *base;
> > +__m128i idx;
> > +
> > +void extern
> > +avx2_test (void)
> > +{
> > +  x = _mm_i64gather_pd (base, idx, 1);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c
> > new file mode 100644
> > index 00000000000..155b818191f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c
> > @@ -0,0 +1,16 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -march=haswell" } */
> > +/* { dg-final { scan-assembler-not "\tvgather" } } */
> > +/* { dg-final { scan-assembler "addr32 vgather" } } */
> > +
> > +#include <immintrin.h>
> > +
> > +__m128d x;
> > +__m128i idx;
> > +
> > +void extern
> > +avx2_test (void)
> > +{
> > +  x = _mm_i32gather_pd (NULL, idx, 1);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c
> > new file mode 100644
> > index 00000000000..11210ff8f78
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512pf" } */
> > +/* { dg-final { scan-assembler "\tvgather" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> > +
> > +#include <immintrin.h>
> > +
> > +volatile __m256i idx;
> > +volatile __mmask8 m8;
> > +void *base;
> > +
> > +void extern
> > +avx512pf_test (void)
> > +{
> > +  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0);
> > +  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c
> > new file mode 100644
> > index 00000000000..0254ad435e3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512pf" } */
> > +/* { dg-final { scan-assembler-not "\tvgather" } } */
> > +/* { dg-final { scan-assembler "addr32 vgather" } } */
> > +
> > +#include <immintrin.h>
> > +
> > +volatile __m256i idx;
> > +volatile __mmask8 m8;
> > +
> > +void extern
> > +avx512pf_test (void)
> > +{
> > +  _mm512_prefetch_i32gather_pd (idx, NULL, 8, _MM_HINT_T0);
> > +  _mm512_mask_prefetch_i32gather_pd (idx, m8, NULL, 8, _MM_HINT_T0);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c
> > new file mode 100644
> > index 00000000000..1c357bc8505
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512f" } */
> > +/* { dg-final { scan-assembler "\tvscatter" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
> > +
> > +#include <immintrin.h>
> > +
> > +volatile __m512d src;
> > +volatile __m256i idx;
> > +volatile __mmask8 m8;
> > +double *addr;
> > +
> > +void extern
> > +avx512f_test (void)
> > +{
> > +  _mm512_i32scatter_pd (addr, idx, src, 8);
> > +  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c
> > new file mode 100644
> > index 00000000000..37b0a0bebb7
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512f" } */
> > +/* { dg-final { scan-assembler "\tvscatter" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
> > +
> > +#include <immintrin.h>
> > +
> > +volatile __m512d src;
> > +volatile __m512i idx;
> > +volatile __mmask8 m8;
> > +double *addr;
> > +
> > +void extern
> > +avx512f_test (void)
> > +{
> > +  _mm512_i64scatter_pd (addr, idx, src, 8);
> > +  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c
> > new file mode 100644
> > index 00000000000..a878f1e9efb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c
> > @@ -0,0 +1,16 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512f" } */
> > +/* { dg-final { scan-assembler-not "\tvscatter" } } */
> > +/* { dg-final { scan-assembler "addr32 vscatter" } } */
> > +
> > +#include <immintrin.h>
> > +
> > +volatile __m512d src;
> > +volatile __m256i idx;
> > +
> > +void extern
> > +avx512f_test (void)
> > +{
> > +  _mm512_i32scatter_pd (NULL, idx, src, 8);
> > +}
> > --
> > 2.20.1
> >



-- 
H.J.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions
  2019-03-03 21:18   ` H.J. Lu
@ 2019-03-03 21:34     ` Uros Bizjak
  2019-03-04 13:54       ` V2 " H.J. Lu
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2019-03-03 21:34 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Sun, Mar 3, 2019 at 10:18 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Sun, Mar 3, 2019 at 9:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
> > > when 32-bit indices are used as addresses, like in
> > >
> > > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6
> > >
> > > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
> > > is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
> > > for x32 if there is no base register nor symbol.
> > >
> > > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with
> > >
> > > -Ofast -funroll-loops -march=haswell
> >
> > 1. Testcases 2 to 9 fail on fedora-29 with:
> >
> > In file included from /usr/include/features.h:452,
> >                  from /usr/include/bits/libc-header-start.h:33,
> >                  from /usr/include/stdlib.h:25,
> >                  from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27,
> >                  from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34,
> >                  from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29,
> >                  from
> > /home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7:
> > /usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such
> > file or directory
>
> I will update tests to remove  "#include immintrin.h"
>
> > 2. Does the patch work with -maddress-mode={short,long}?
>
> Yes.
>
> > 3. The implementation is wrong. You should use operand substitution
> > with VSIB address as operand, not substitution without operand.
>
> How can I add an addr32 prefix with operand substitution?  This is
> very similar to "%^".  My updated patch will use "%^".

Yes, using %^ is what I think would be the optimal solution. Other
than that, in your proposed patch, operand-less %_ scans the entire
current_output_insn to dig to the UNSPEC_VSIBADDR. You can just use
operand substitution, and do e.g. "%X2vgatherpf0..." where 'X'
processes operand 2 (vsib_address_operand) and conditionally outputs
addr32.

BTW: In a new version of the patch, please specify what is changed
from the previous version. Otherwise, review of a new version is more
or less a guesswork what changed.

Uros.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* V2 [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions
  2019-03-03 21:34     ` Uros Bizjak
@ 2019-03-04 13:54       ` H.J. Lu
  2019-03-04 14:09         ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: H.J. Lu @ 2019-03-04 13:54 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On Sun, Mar 03, 2019 at 10:34:29PM +0100, Uros Bizjak wrote:
> On Sun, Mar 3, 2019 at 10:18 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Sun, Mar 3, 2019 at 9:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
> > > > when 32-bit indices are used as addresses, like in
> > > >
> > > > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6
> > > >
> > > > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
> > > > is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
> > > > for x32 if there is no base register nor symbol.
> > > >
> > > > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with
> > > >
> > > > -Ofast -funroll-loops -march=haswell
> > >
> > > 1. Testcases 2 to 9 fail on fedora-29 with:
> > >
> > > In file included from /usr/include/features.h:452,
> > >                  from /usr/include/bits/libc-header-start.h:33,
> > >                  from /usr/include/stdlib.h:25,
> > >                  from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27,
> > >                  from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34,
> > >                  from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29,
> > >                  from
> > > /home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7:
> > > /usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such
> > > file or directory
> >
> > I will update tests to remove  "#include immintrin.h"
> >
> > > 2. Does the patch work with -maddress-mode={short,long}?
> >
> > Yes.
> >
> > > 3. The implementation is wrong. You should use operand substitution
> > > with VSIB address as operand, not substitution without operand.
> >
> > How can I add an addr32 prefix with operand substitution?  This is
> > very similar to "%^".  My updated patch will use "%^".
> 
> Yes, using %^ is what I think would be the optimal solution. Other
> than that, in your proposed patch, operand-less %_ scans the entire
> current_output_insn to dig to the UNSPEC_VSIBADDR. You can just use
> operand substitution, and do e.g. "%X2vgatherpf0..." where 'X'
> processes operand 2 (vsib_address_operand) and conditionally outputs
> addr32.
> 
> BTW: In a new version of the patch, please specify what is changed
> from the previous version. Otherwise, review of a new version is more
> or less a guesswork what changed.
> 

Here is the updated patch.  The change is

	return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

instead of

	return "%^vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

We can't use the %X5 since %X5 is used on operands.

I also added a test for -maddress-mode=long.


H.J.
---
32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
when 32-bit indices are used as addresses, like in

vgatherdps %ymm7, 0(,%ymm9,1), %ymm6

32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
for x32 if there is no base register nor symbol.

This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with

-Ofast -funroll-loops -march=haswell

gcc/

	PR target/89523
	* config/i386/i386.c (ix86_print_operand): Handle UNSPEC_VSIBADDR
	instructions for '%P' to add addr32 prefix if required.
	* config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend
	"%P5" to opcode.
	(*avx512pf_gatherpf<mode>df_mask): Likewise.
	(*avx512pf_scatterpf<mode>sf_mask): Likewise.
	(*avx512pf_scatterpf<mode>df_mask): Likewise.
	(*avx2_gathersi<mode>): Prepend "%P7" to opcode.
	(*avx2_gathersi<mode>_2): Prepend "%P6" to opcode.
	(*avx2_gatherdi<mode>): Prepend "%P7" to opcode.
	(*avx2_gatherdi<mode>_2): Prepend "%P6" to opcode.
	(*avx2_gatherdi<mode>_3): Prepend "%P7" to opcode.
	(*avx2_gatherdi<mode>_4): Prepend "%P6" to opcode.`
	(*avx512f_gathersi<mode>): Prepend "%P5" to opcode.
	(*avx512f_gathersi<mode>_2): Prepend "%P6" to opcode.
	(*avx512f_gatherdi<mode>): Prepend "%P5" to opcode.
	(*avx512f_gatherdi<mode>_2): Likewise.
	(*avx512f_scattersi<mode>): Likewise.
	(*avx512f_scatterdi<mode>): Likewise.

gcc/testsuite/

	PR target/89523
	* gcc.target/i386/pr89523-1a.c: New test.
	* gcc.target/i386/pr89523-1b.c: Likewise.
	* gcc.target/i386/pr89523-2.c: Likewise.
	* gcc.target/i386/pr89523-3.c: Likewise.
	* gcc.target/i386/pr89523-4.c: Likewise.
	* gcc.target/i386/pr89523-5.c: Likewise.
	* gcc.target/i386/pr89523-6.c: Likewise.
	* gcc.target/i386/pr89523-7.c: Likewise.
	* gcc.target/i386/pr89523-8.c: Likewise.
	* gcc.target/i386/pr89523-9.c: Likewise.
---
 gcc/config/i386/i386.c                     | 35 +++++++++++++++-
 gcc/config/i386/sse.md                     | 46 +++++++++++-----------
 gcc/testsuite/gcc.target/i386/pr89523-1a.c | 24 +++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-1b.c |  7 ++++
 gcc/testsuite/gcc.target/i386/pr89523-2.c  | 37 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-3.c  | 36 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-4.c  | 36 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-5.c  | 39 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-6.c  | 38 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-7.c  | 42 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-8.c  | 41 +++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-9.c  | 30 ++++++++++++++
 12 files changed, 386 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c8f9957163b..ae9befb638d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17793,7 +17793,8 @@ print_reg (rtx x, int code, FILE *file)
    y -- print "st(0)" instead of "st" as a register.
    d -- print duplicated register operand for AVX instruction.
    D -- print condition for SSE cmp instruction.
-   P -- if PIC, print an @PLT suffix.
+   P -- if PIC, print an @PLT suffix or print addr32 prefix for
+	TARGET_X32 with UNSPEC_VSIBADDR operand.
    p -- print raw symbol name.
    X -- don't print any sort of PIC '@' suffix for a symbol.
    & -- print some in-use local-dynamic symbol name.
@@ -18010,6 +18011,37 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	  output_operand_lossage ("invalid operand size for operand code 'Z'");
 	  return;
 
+	case 'P':
+	  if (MEM_P (x))
+	    {
+	      x = XEXP (x, 0);
+	      if (GET_CODE (x) == UNSPEC
+		  || XINT (x, 1) == UNSPEC_VSIBADDR)
+		{
+		  if (TARGET_X32)
+		    {
+		      /* NB: 32-bit indices in VSIB address are
+			 sign-extended to 64 bits. In x32, if 32-bit
+			 address 0xf7fa3010 is sign-extended to
+			 0xfffffffff7fa3010 which is invalid address.
+			 Add addr32 prefix if there is no base register
+			 nor symbol.  */
+		      bool ok;
+		      struct ix86_address parts;
+		      ok = ix86_decompose_address (XVECEXP (x, 0, 0),
+						   &parts);
+		      gcc_assert (ok && parts.index == NULL_RTX);
+		      if (parts.base == NULL_RTX
+			  && (parts.disp == NULL_RTX
+			      || !symbolic_operand (parts.disp,
+						    GET_MODE (parts.disp))))
+			fputs ("addr32 ", file);
+		    }
+		  return;
+		}
+	    }
+	  break;
+
 	case 'd':
 	case 'b':
 	case 'w':
@@ -18021,7 +18053,6 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	case 'y':
 	case 'x':
 	case 'X':
-	case 'P':
 	case 'p':
 	case 'V':
 	  break;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ac299495b2c..ac500f9cc63 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17401,9 +17401,9 @@
     case 3:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%P5vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
-      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%P5vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17448,9 +17448,9 @@
     case 3:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%P5vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
-      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%P5vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17496,10 +17496,10 @@
     case 7:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%P5vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
     case 6:
-      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17545,10 +17545,10 @@
     case 7:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%P5vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
     case 6:
-      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%P5vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -20292,7 +20292,7 @@
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
+  "%P7v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20312,7 +20312,7 @@
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
+  "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20353,7 +20353,7 @@
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
+  "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20375,8 +20375,8 @@
   "TARGET_AVX2"
 {
   if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
-    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
+    return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
+  return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
 }
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
@@ -20400,7 +20400,7 @@
 		     (const_int 2) (const_int 3)])))
    (clobber (match_scratch:VI4F_256 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
+  "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20423,7 +20423,7 @@
 		     (const_int 2) (const_int 3)])))
    (clobber (match_scratch:VI4F_256 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
+  "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20463,7 +20463,7 @@
   "TARGET_AVX512F"
 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
+  "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20484,7 +20484,7 @@
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
+  "%P5v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20525,7 +20525,7 @@
   "TARGET_AVX512F"
 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
+  "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20550,11 +20550,11 @@
   if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
     {
       if (<MODE_SIZE> != 64)
-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
+	return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
       else
-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
+	return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
     }
-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
+  return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
 }
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -20593,7 +20593,7 @@
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+  "%P5v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20631,7 +20631,7 @@
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+  "%P5v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1a.c b/gcc/testsuite/gcc.target/i386/pr89523-1a.c
new file mode 100644
index 00000000000..0d0edab0363
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-1a.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-maddress-mode=short -mx32 -Ofast -funroll-loops -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+void foo (void);
+
+extern float *ncost;
+
+float
+bar (int type, int num)
+{
+  int i;
+  float cost;
+
+  cost = 0;
+  for (i = 0; i < num; i++)
+    if (type)
+      cost += ncost[i];
+    else
+      foo ();
+  return (cost);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1b.c b/gcc/testsuite/gcc.target/i386/pr89523-1b.c
new file mode 100644
index 00000000000..6a5c1d43625
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-1b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-maddress-mode=long -mx32 -Ofast -funroll-loops -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+#include "pr89523-1a.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c
new file mode 100644
index 00000000000..2ffbffe5e40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
+{
+  __v2df __zero = { 0.0, 0.0 };
+  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
+  __v2df x = x;
+
+  return (__m128d) __builtin_ia32_gathersiv2df (x,
+						__base,
+						(__v4si)__index,
+						__mask,
+						__scale);
+}
+
+__m128d x;
+double *base;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i32gather_pd (base, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c
new file mode 100644
index 00000000000..fc3631b694b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_pd (double const *__base, __m128i __index, const int __scale)
+{
+  __v2df __zero = { 0.0, 0.0 };
+  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
+
+  return (__m128d) __builtin_ia32_gatherdiv2df (__zero,
+						__base,
+						(__v2di)__index,
+						__mask,
+						__scale);
+}
+
+__m128d x;
+double *base;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i64gather_pd (base, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c
new file mode 100644
index 00000000000..3436e5dcae3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
+{
+  __v2df __zero = { 0.0, 0.0 };
+  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
+  __v2df x = x;
+
+  return (__m128d) __builtin_ia32_gathersiv2df (x,
+						__base,
+						(__v4si)__index,
+						__mask,
+						__scale);
+}
+
+__m128d x;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i32gather_pd ((void *) 0, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c
new file mode 100644
index 00000000000..6a769c7a249
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c
@@ -0,0 +1,39 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512pf" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
+			      int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
+			      __scale, __hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
+				   void const *__addr, int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
+			      __hint);
+}
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32gather_pd (idx, base, 8, 3);
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c
new file mode 100644
index 00000000000..82f795e085c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512pf" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
+			      int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
+			      __scale, __hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
+				   void const *__addr, int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
+			      __hint);
+}
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32gather_pd (idx, (void *) 0, 8, 3);
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, (void *) 0, 8, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c
new file mode 100644
index 00000000000..030b00d268a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c
@@ -0,0 +1,42 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler "\tvscatter" } } */
+/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
+
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
+		      int __scale)
+{
+  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
+				(__v8si) __index, (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
+			   __m256i __index, __m512d __v1, int __scale)
+{
+  __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
+				(__v8df) __v1, __scale);
+}
+
+volatile __m512d src;
+volatile __m256i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i32scatter_pd (addr, idx, src, 8);
+  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c
new file mode 100644
index 00000000000..465c985c2b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c
@@ -0,0 +1,41 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler "\tvscatter" } } */
+/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
+
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
+		      int __scale)
+{
+  __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
+				(__v8di) __index, (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
+			   __m512i __index, __m512d __v1, int __scale)
+{
+  __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
+				(__v8df) __v1, __scale);
+}
+
+volatile __m512d src;
+volatile __m512i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i64scatter_pd (addr, idx, src, 8);
+  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c
new file mode 100644
index 00000000000..e9323126bd6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler-not "\tvscatter" } } */
+/* { dg-final { scan-assembler "addr32 vscatter" } } */
+
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
+		      int __scale)
+{
+  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
+				(__v8si) __index, (__v8df) __v1, __scale);
+}
+
+volatile __m512d src;
+volatile __m256i idx;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i32scatter_pd ((void *) 0, idx, src, 8);
+}
-- 
2.20.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: V2 [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions
  2019-03-04 13:54       ` V2 " H.J. Lu
@ 2019-03-04 14:09         ` Uros Bizjak
  2019-03-14  8:34           ` H.J. Lu
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2019-03-04 14:09 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches

On Mon, Mar 4, 2019 at 2:54 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Sun, Mar 03, 2019 at 10:34:29PM +0100, Uros Bizjak wrote:
> > On Sun, Mar 3, 2019 at 10:18 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Sun, Mar 3, 2019 at 9:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > >
> > > > On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
> > > > > when 32-bit indices are used as addresses, like in
> > > > >
> > > > > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6
> > > > >
> > > > > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
> > > > > is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
> > > > > for x32 if there is no base register nor symbol.
> > > > >
> > > > > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with
> > > > >
> > > > > -Ofast -funroll-loops -march=haswell
> > > >
> > > > 1. Testcases 2 to 9 fail on fedora-29 with:
> > > >
> > > > In file included from /usr/include/features.h:452,
> > > >                  from /usr/include/bits/libc-header-start.h:33,
> > > >                  from /usr/include/stdlib.h:25,
> > > >                  from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27,
> > > >                  from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34,
> > > >                  from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29,
> > > >                  from
> > > > /home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7:
> > > > /usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such
> > > > file or directory
> > >
> > > I will update tests to remove  "#include immintrin.h"
> > >
> > > > 2. Does the patch work with -maddress-mode={short,long}?
> > >
> > > Yes.
> > >
> > > > 3. The implementation is wrong. You should use operand substitution
> > > > with VSIB address as operand, not substitution without operand.
> > >
> > > How can I add an addr32 prefix with operand substitution?  This is
> > > very similar to "%^".  My updated patch will use "%^".
> >
> > Yes, using %^ is what I think would be the optimal solution. Other
> > than that, in your proposed patch, operand-less %_ scans the entire
> > current_output_insn to dig to the UNSPEC_VSIBADDR. You can just use
> > operand substitution, and do e.g. "%X2vgatherpf0..." where 'X'
> > processes operand 2 (vsib_address_operand) and conditionally outputs
> > addr32.
> >
> > BTW: In a new version of the patch, please specify what is changed
> > from the previous version. Otherwise, review of a new version is more
> > or less a guesswork what changed.
> >
>
> Here is the updated patch.  The change is
>
>         return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>
> instead of
>
>         return "%^vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";

Did I miss some version of the patch that introduced %^? You used %_
in your previous patch. Did your try with %^?

> We can't use the %X5 since %X5 is used on operands.

So, please introduce some other modifier ("X" was not to be taken
literally, but *some* letter). Why are you overloading 'P'?

I don't know why are you using operand 5 here, you can use operand 2 directly.

Uros.

> I also added a test for -maddress-mode=long.
>
>
> H.J.
> ---
> 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
> when 32-bit indices are used as addresses, like in
>
> vgatherdps %ymm7, 0(,%ymm9,1), %ymm6
>
> 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
> is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
> for x32 if there is no base register nor symbol.
>
> This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with
>
> -Ofast -funroll-loops -march=haswell
>
> gcc/
>
>         PR target/89523
>         * config/i386/i386.c (ix86_print_operand): Handle UNSPEC_VSIBADDR
>         instructions for '%P' to add addr32 prefix if required.
>         * config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend
>         "%P5" to opcode.
>         (*avx512pf_gatherpf<mode>df_mask): Likewise.
>         (*avx512pf_scatterpf<mode>sf_mask): Likewise.
>         (*avx512pf_scatterpf<mode>df_mask): Likewise.
>         (*avx2_gathersi<mode>): Prepend "%P7" to opcode.
>         (*avx2_gathersi<mode>_2): Prepend "%P6" to opcode.
>         (*avx2_gatherdi<mode>): Prepend "%P7" to opcode.
>         (*avx2_gatherdi<mode>_2): Prepend "%P6" to opcode.
>         (*avx2_gatherdi<mode>_3): Prepend "%P7" to opcode.
>         (*avx2_gatherdi<mode>_4): Prepend "%P6" to opcode.`
>         (*avx512f_gathersi<mode>): Prepend "%P5" to opcode.
>         (*avx512f_gathersi<mode>_2): Prepend "%P6" to opcode.
>         (*avx512f_gatherdi<mode>): Prepend "%P5" to opcode.
>         (*avx512f_gatherdi<mode>_2): Likewise.
>         (*avx512f_scattersi<mode>): Likewise.
>         (*avx512f_scatterdi<mode>): Likewise.
>
> gcc/testsuite/
>
>         PR target/89523
>         * gcc.target/i386/pr89523-1a.c: New test.
>         * gcc.target/i386/pr89523-1b.c: Likewise.
>         * gcc.target/i386/pr89523-2.c: Likewise.
>         * gcc.target/i386/pr89523-3.c: Likewise.
>         * gcc.target/i386/pr89523-4.c: Likewise.
>         * gcc.target/i386/pr89523-5.c: Likewise.
>         * gcc.target/i386/pr89523-6.c: Likewise.
>         * gcc.target/i386/pr89523-7.c: Likewise.
>         * gcc.target/i386/pr89523-8.c: Likewise.
>         * gcc.target/i386/pr89523-9.c: Likewise.
> ---
>  gcc/config/i386/i386.c                     | 35 +++++++++++++++-
>  gcc/config/i386/sse.md                     | 46 +++++++++++-----------
>  gcc/testsuite/gcc.target/i386/pr89523-1a.c | 24 +++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-1b.c |  7 ++++
>  gcc/testsuite/gcc.target/i386/pr89523-2.c  | 37 +++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-3.c  | 36 +++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-4.c  | 36 +++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-5.c  | 39 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-6.c  | 38 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-7.c  | 42 ++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-8.c  | 41 +++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr89523-9.c  | 30 ++++++++++++++
>  12 files changed, 386 insertions(+), 25 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index c8f9957163b..ae9befb638d 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -17793,7 +17793,8 @@ print_reg (rtx x, int code, FILE *file)
>     y -- print "st(0)" instead of "st" as a register.
>     d -- print duplicated register operand for AVX instruction.
>     D -- print condition for SSE cmp instruction.
> -   P -- if PIC, print an @PLT suffix.
> +   P -- if PIC, print an @PLT suffix or print addr32 prefix for
> +       TARGET_X32 with UNSPEC_VSIBADDR operand.
>     p -- print raw symbol name.
>     X -- don't print any sort of PIC '@' suffix for a symbol.
>     & -- print some in-use local-dynamic symbol name.
> @@ -18010,6 +18011,37 @@ ix86_print_operand (FILE *file, rtx x, int code)
>           output_operand_lossage ("invalid operand size for operand code 'Z'");
>           return;
>
> +       case 'P':
> +         if (MEM_P (x))
> +           {
> +             x = XEXP (x, 0);
> +             if (GET_CODE (x) == UNSPEC
> +                 || XINT (x, 1) == UNSPEC_VSIBADDR)
> +               {
> +                 if (TARGET_X32)
> +                   {
> +                     /* NB: 32-bit indices in VSIB address are
> +                        sign-extended to 64 bits. In x32, if 32-bit
> +                        address 0xf7fa3010 is sign-extended to
> +                        0xfffffffff7fa3010 which is invalid address.
> +                        Add addr32 prefix if there is no base register
> +                        nor symbol.  */
> +                     bool ok;
> +                     struct ix86_address parts;
> +                     ok = ix86_decompose_address (XVECEXP (x, 0, 0),
> +                                                  &parts);
> +                     gcc_assert (ok && parts.index == NULL_RTX);
> +                     if (parts.base == NULL_RTX
> +                         && (parts.disp == NULL_RTX
> +                             || !symbolic_operand (parts.disp,
> +                                                   GET_MODE (parts.disp))))
> +                       fputs ("addr32 ", file);
> +                   }
> +                 return;
> +               }
> +           }
> +         break;
> +
>         case 'd':
>         case 'b':
>         case 'w':
> @@ -18021,7 +18053,6 @@ ix86_print_operand (FILE *file, rtx x, int code)
>         case 'y':
>         case 'x':
>         case 'X':
> -       case 'P':
>         case 'p':
>         case 'V':
>           break;
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index ac299495b2c..ac500f9cc63 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -17401,9 +17401,9 @@
>      case 3:
>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>          gas changed what it requires incompatibly.  */
> -      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%P5vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>      case 2:
> -      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%P5vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>      default:
>        gcc_unreachable ();
>      }
> @@ -17448,9 +17448,9 @@
>      case 3:
>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>          gas changed what it requires incompatibly.  */
> -      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%P5vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
>      case 2:
> -      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%P5vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
>      default:
>        gcc_unreachable ();
>      }
> @@ -17496,10 +17496,10 @@
>      case 7:
>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>          gas changed what it requires incompatibly.  */
> -      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%P5vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>      case 2:
>      case 6:
> -      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>      default:
>        gcc_unreachable ();
>      }
> @@ -17545,10 +17545,10 @@
>      case 7:
>        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>          gas changed what it requires incompatibly.  */
> -      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%P5vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
>      case 2:
>      case 6:
> -      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> +      return "%P5vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
>      default:
>        gcc_unreachable ();
>      }
> @@ -20292,7 +20292,7 @@
>           UNSPEC_GATHER))
>     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
> +  "%P7v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20312,7 +20312,7 @@
>           UNSPEC_GATHER))
>     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
> +  "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20353,7 +20353,7 @@
>           UNSPEC_GATHER))
>     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
> +  "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20375,8 +20375,8 @@
>    "TARGET_AVX2"
>  {
>    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
> -    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
> -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
> +    return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
> +  return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
>  }
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
> @@ -20400,7 +20400,7 @@
>                      (const_int 2) (const_int 3)])))
>     (clobber (match_scratch:VI4F_256 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
> +  "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20423,7 +20423,7 @@
>                      (const_int 2) (const_int 3)])))
>     (clobber (match_scratch:VI4F_256 1 "=&x"))]
>    "TARGET_AVX2"
> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
> +  "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "vex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20463,7 +20463,7 @@
>    "TARGET_AVX512F"
>  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
> +  "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20484,7 +20484,7 @@
>    "TARGET_AVX512F"
>  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
> +  "%P5v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20525,7 +20525,7 @@
>    "TARGET_AVX512F"
>  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
> +  "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20550,11 +20550,11 @@
>    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
>      {
>        if (<MODE_SIZE> != 64)
> -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
> +       return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
>        else
> -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
> +       return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
>      }
> -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
> +  return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
>  }
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
> @@ -20593,7 +20593,7 @@
>    "TARGET_AVX512F"
>  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> +  "%P5v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> @@ -20631,7 +20631,7 @@
>    "TARGET_AVX512F"
>  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
>  ;; gas changed what it requires incompatibly.
> -  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> +  "%P5v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
>    [(set_attr "type" "ssemov")
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1a.c b/gcc/testsuite/gcc.target/i386/pr89523-1a.c
> new file mode 100644
> index 00000000000..0d0edab0363
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-1a.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-maddress-mode=short -mx32 -Ofast -funroll-loops -march=haswell" } */
> +/* { dg-final { scan-assembler-not "\tvgather" } } */
> +/* { dg-final { scan-assembler "addr32 vgather" } } */
> +
> +void foo (void);
> +
> +extern float *ncost;
> +
> +float
> +bar (int type, int num)
> +{
> +  int i;
> +  float cost;
> +
> +  cost = 0;
> +  for (i = 0; i < num; i++)
> +    if (type)
> +      cost += ncost[i];
> +    else
> +      foo ();
> +  return (cost);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1b.c b/gcc/testsuite/gcc.target/i386/pr89523-1b.c
> new file mode 100644
> index 00000000000..6a5c1d43625
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-1b.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-maddress-mode=long -mx32 -Ofast -funroll-loops -march=haswell" } */
> +/* { dg-final { scan-assembler-not "\tvgather" } } */
> +/* { dg-final { scan-assembler "addr32 vgather" } } */
> +
> +#include "pr89523-1a.c"
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c
> new file mode 100644
> index 00000000000..2ffbffe5e40
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -march=haswell" } */
> +/* { dg-final { scan-assembler "\tvgather" } } */
> +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> +
> +typedef double __v2df __attribute__ ((__vector_size__ (16)));
> +typedef int __v4si __attribute__ ((__vector_size__ (16)));
> +typedef long long __v2di __attribute__ ((__vector_size__ (16)));
> +
> +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
> +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
> +
> +extern __inline __m128d
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
> +{
> +  __v2df __zero = { 0.0, 0.0 };
> +  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
> +  __v2df x = x;
> +
> +  return (__m128d) __builtin_ia32_gathersiv2df (x,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               __mask,
> +                                               __scale);
> +}
> +
> +__m128d x;
> +double *base;
> +__m128i idx;
> +
> +void extern
> +avx2_test (void)
> +{
> +  x = _mm_i32gather_pd (base, idx, 1);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c
> new file mode 100644
> index 00000000000..fc3631b694b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -march=haswell" } */
> +/* { dg-final { scan-assembler "\tvgather" } } */
> +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> +
> +typedef double __v2df __attribute__ ((__vector_size__ (16)));
> +typedef int __v4si __attribute__ ((__vector_size__ (16)));
> +typedef long long __v2di __attribute__ ((__vector_size__ (16)));
> +
> +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
> +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
> +
> +extern __inline __m128d
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_i64gather_pd (double const *__base, __m128i __index, const int __scale)
> +{
> +  __v2df __zero = { 0.0, 0.0 };
> +  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
> +
> +  return (__m128d) __builtin_ia32_gatherdiv2df (__zero,
> +                                               __base,
> +                                               (__v2di)__index,
> +                                               __mask,
> +                                               __scale);
> +}
> +
> +__m128d x;
> +double *base;
> +__m128i idx;
> +
> +void extern
> +avx2_test (void)
> +{
> +  x = _mm_i64gather_pd (base, idx, 1);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c
> new file mode 100644
> index 00000000000..3436e5dcae3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -march=haswell" } */
> +/* { dg-final { scan-assembler-not "\tvgather" } } */
> +/* { dg-final { scan-assembler "addr32 vgather" } } */
> +
> +typedef double __v2df __attribute__ ((__vector_size__ (16)));
> +typedef int __v4si __attribute__ ((__vector_size__ (16)));
> +typedef long long __v2di __attribute__ ((__vector_size__ (16)));
> +
> +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
> +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
> +
> +extern __inline __m128d
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
> +{
> +  __v2df __zero = { 0.0, 0.0 };
> +  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
> +  __v2df x = x;
> +
> +  return (__m128d) __builtin_ia32_gathersiv2df (x,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               __mask,
> +                                               __scale);
> +}
> +
> +__m128d x;
> +__m128i idx;
> +
> +void extern
> +avx2_test (void)
> +{
> +  x = _mm_i32gather_pd ((void *) 0, idx, 1);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c
> new file mode 100644
> index 00000000000..6a769c7a249
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512pf" } */
> +/* { dg-final { scan-assembler "\tvgather" } } */
> +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> +
> +typedef int __v8si __attribute__ ((__vector_size__ (32)));
> +typedef long long __m256i __attribute__ ((__vector_size__ (32),
> +                                         __may_alias__));
> +typedef unsigned char  __mmask8;
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
> +                             int __scale, int __hint)
> +{
> +  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
> +                             __scale, __hint);
> +}
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
> +                                  void const *__addr, int __scale, int __hint)
> +{
> +  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
> +                             __hint);
> +}
> +
> +volatile __m256i idx;
> +volatile __mmask8 m8;
> +void *base;
> +
> +void extern
> +avx512pf_test (void)
> +{
> +  _mm512_prefetch_i32gather_pd (idx, base, 8, 3);
> +  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 3);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c
> new file mode 100644
> index 00000000000..82f795e085c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512pf" } */
> +/* { dg-final { scan-assembler-not "\tvgather" } } */
> +/* { dg-final { scan-assembler "addr32 vgather" } } */
> +
> +typedef int __v8si __attribute__ ((__vector_size__ (32)));
> +typedef long long __m256i __attribute__ ((__vector_size__ (32),
> +                                         __may_alias__));
> +typedef unsigned char  __mmask8;
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
> +                             int __scale, int __hint)
> +{
> +  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
> +                             __scale, __hint);
> +}
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
> +                                  void const *__addr, int __scale, int __hint)
> +{
> +  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
> +                             __hint);
> +}
> +
> +volatile __m256i idx;
> +volatile __mmask8 m8;
> +
> +void extern
> +avx512pf_test (void)
> +{
> +  _mm512_prefetch_i32gather_pd (idx, (void *) 0, 8, 3);
> +  _mm512_mask_prefetch_i32gather_pd (idx, m8, (void *) 0, 8, 3);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c
> new file mode 100644
> index 00000000000..030b00d268a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512f" } */
> +/* { dg-final { scan-assembler "\tvscatter" } } */
> +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
> +
> +typedef int __v8si __attribute__ ((__vector_size__ (32)));
> +typedef double __v8df __attribute__ ((__vector_size__ (64)));
> +typedef long long __m256i __attribute__ ((__vector_size__ (32),
> +                                         __may_alias__));
> +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> +typedef unsigned char  __mmask8;
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
> +                     int __scale)
> +{
> +  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
> +                               (__v8si) __index, (__v8df) __v1, __scale);
> +}
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
> +                          __m256i __index, __m512d __v1, int __scale)
> +{
> +  __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
> +                               (__v8df) __v1, __scale);
> +}
> +
> +volatile __m512d src;
> +volatile __m256i idx;
> +volatile __mmask8 m8;
> +double *addr;
> +
> +void extern
> +avx512f_test (void)
> +{
> +  _mm512_i32scatter_pd (addr, idx, src, 8);
> +  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c
> new file mode 100644
> index 00000000000..465c985c2b7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512f" } */
> +/* { dg-final { scan-assembler "\tvscatter" } } */
> +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
> +
> +typedef long long __v8di __attribute__ ((__vector_size__ (64)));
> +typedef double __v8df __attribute__ ((__vector_size__ (64)));
> +typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
> +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> +typedef unsigned char  __mmask8;
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
> +                     int __scale)
> +{
> +  __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
> +                               (__v8di) __index, (__v8df) __v1, __scale);
> +}
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
> +                          __m512i __index, __m512d __v1, int __scale)
> +{
> +  __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
> +                               (__v8df) __v1, __scale);
> +}
> +
> +volatile __m512d src;
> +volatile __m512i idx;
> +volatile __mmask8 m8;
> +double *addr;
> +
> +void extern
> +avx512f_test (void)
> +{
> +  _mm512_i64scatter_pd (addr, idx, src, 8);
> +  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c
> new file mode 100644
> index 00000000000..e9323126bd6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-require-effective-target maybe_x32 } */
> +/* { dg-options "-mx32 -O2 -mavx512f" } */
> +/* { dg-final { scan-assembler-not "\tvscatter" } } */
> +/* { dg-final { scan-assembler "addr32 vscatter" } } */
> +
> +typedef int __v8si __attribute__ ((__vector_size__ (32)));
> +typedef double __v8df __attribute__ ((__vector_size__ (64)));
> +typedef long long __m256i __attribute__ ((__vector_size__ (32),
> +                                         __may_alias__));
> +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> +typedef unsigned char  __mmask8;
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
> +                     int __scale)
> +{
> +  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
> +                               (__v8si) __index, (__v8df) __v1, __scale);
> +}
> +
> +volatile __m512d src;
> +volatile __m256i idx;
> +
> +void extern
> +avx512f_test (void)
> +{
> +  _mm512_i32scatter_pd ((void *) 0, idx, src, 8);
> +}
> --
> 2.20.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: V2 [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions
  2019-03-04 14:09         ` Uros Bizjak
@ 2019-03-14  8:34           ` H.J. Lu
  2019-03-14  8:47             ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: H.J. Lu @ 2019-03-14  8:34 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 34702 bytes --]

On Mon, Mar 4, 2019 at 10:09 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Mon, Mar 4, 2019 at 2:54 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Sun, Mar 03, 2019 at 10:34:29PM +0100, Uros Bizjak wrote:
> > > On Sun, Mar 3, 2019 at 10:18 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Sun, Mar 3, 2019 at 9:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > > >
> > > > > On Thu, Feb 28, 2019 at 8:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
> > > > > > when 32-bit indices are used as addresses, like in
> > > > > >
> > > > > > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6
> > > > > >
> > > > > > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
> > > > > > is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
> > > > > > for x32 if there is no base register nor symbol.
> > > > > >
> > > > > > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with
> > > > > >
> > > > > > -Ofast -funroll-loops -march=haswell
> > > > >
> > > > > 1. Testcases 2 to 9 fail on fedora-29 with:
> > > > >
> > > > > In file included from /usr/include/features.h:452,
> > > > >                  from /usr/include/bits/libc-header-start.h:33,
> > > > >                  from /usr/include/stdlib.h:25,
> > > > >                  from /ssd/uros/gcc-build-fast/gcc/include/mm_malloc.h:27,
> > > > >                  from /ssd/uros/gcc-build-fast/gcc/include/xmmintrin.h:34,
> > > > >                  from /ssd/uros/gcc-build-fast/gcc/include/immintrin.h:29,
> > > > >                  from
> > > > > /home/uros/gcc-svn/trunk/gcc/testsuite/gcc.target/i386/pr89523-2.c:7:
> > > > > /usr/include/gnu/stubs.h:13:11: fatal error: gnu/stubs-x32.h: No such
> > > > > file or directory
> > > >
> > > > I will update tests to remove  "#include immintrin.h"
> > > >
> > > > > 2. Does the patch work with -maddress-mode={short,long}?
> > > >
> > > > Yes.
> > > >
> > > > > 3. The implementation is wrong. You should use operand substitution
> > > > > with VSIB address as operand, not substitution without operand.
> > > >
> > > > How can I add an addr32 prefix with operand substitution?  This is
> > > > very similar to "%^".  My updated patch will use "%^".
> > >
> > > Yes, using %^ is what I think would be the optimal solution. Other
> > > than that, in your proposed patch, operand-less %_ scans the entire
> > > current_output_insn to dig to the UNSPEC_VSIBADDR. You can just use
> > > operand substitution, and do e.g. "%X2vgatherpf0..." where 'X'
> > > processes operand 2 (vsib_address_operand) and conditionally outputs
> > > addr32.
> > >
> > > BTW: In a new version of the patch, please specify what is changed
> > > from the previous version. Otherwise, review of a new version is more
> > > or less a guesswork what changed.
> > >
> >
> > Here is the updated patch.  The change is
> >
> >         return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> >
> > instead of
> >
> >         return "%^vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
>
> Did I miss some version of the patch that introduced %^? You used %_
> in your previous patch. Did your try with %^?

Yes.   It is very similar to

https://gcc.gnu.org/ml/gcc-patches/2019-02/msg02109.html

with

+ case '^':
+   if (TARGET_X32)
+     {
+       subrtx_var_iterator::array_type array;
+       FOR_EACH_SUBRTX_VAR (iter, array,
+    PATTERN (current_output_insn), ALL)
+ {
+   rtx addr = *iter;
+   if (!MEM_P (addr))
+     continue;
+   addr = XEXP (addr, 0);
+   if (GET_CODE (addr) == UNSPEC
+       && XINT (addr, 1) == UNSPEC_VSIBADDR)
+     {
+       /* NB: 32-bit indices in VSIB address are
+ sign-extended to 64 bits. In x32, if 32-bit
+ address 0xf7fa3010 is sign-extended to
+ 0xfffffffff7fa3010 which is invalid address.
+ Add addr32 prefix if there is no base register
+ nor symbol.  */
+       bool ok;
+       struct ix86_address parts;
+       ok = ix86_decompose_address (XVECEXP (addr, 0, 0),
+    &parts);
+       gcc_assert (ok && parts.index == NULL_RTX);
+       if (parts.base == NULL_RTX
+   && (parts.disp == NULL_RTX
+       || !symbolic_operand (parts.disp,
+     GET_MODE (parts.disp))))
+ fputs ("addr32 ", file);
+       break;
+     }
+ }
+     }

> > We can't use the %X5 since %X5 is used on operands.
>
> So, please introduce some other modifier ("X" was not to be taken
> literally, but *some* letter). Why are you overloading 'P'?

Here is the updated patch with the 'M' modifier.


H.J.


> I don't know why are you using operand 5 here, you can use operand 2 directly.


> Uros.
>
> > I also added a test for -maddress-mode=long.
> >
> >
> > H.J.
> > ---
> > 32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
> > when 32-bit indices are used as addresses, like in
> >
> > vgatherdps %ymm7, 0(,%ymm9,1), %ymm6
> >
> > 32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
> > is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
> > for x32 if there is no base register nor symbol.
> >
> > This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with
> >
> > -Ofast -funroll-loops -march=haswell
> >
> > gcc/
> >
> >         PR target/89523
> >         * config/i386/i386.c (ix86_print_operand): Handle UNSPEC_VSIBADDR
> >         instructions for '%P' to add addr32 prefix if required.
> >         * config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend
> >         "%P5" to opcode.
> >         (*avx512pf_gatherpf<mode>df_mask): Likewise.
> >         (*avx512pf_scatterpf<mode>sf_mask): Likewise.
> >         (*avx512pf_scatterpf<mode>df_mask): Likewise.
> >         (*avx2_gathersi<mode>): Prepend "%P7" to opcode.
> >         (*avx2_gathersi<mode>_2): Prepend "%P6" to opcode.
> >         (*avx2_gatherdi<mode>): Prepend "%P7" to opcode.
> >         (*avx2_gatherdi<mode>_2): Prepend "%P6" to opcode.
> >         (*avx2_gatherdi<mode>_3): Prepend "%P7" to opcode.
> >         (*avx2_gatherdi<mode>_4): Prepend "%P6" to opcode.`
> >         (*avx512f_gathersi<mode>): Prepend "%P5" to opcode.
> >         (*avx512f_gathersi<mode>_2): Prepend "%P6" to opcode.
> >         (*avx512f_gatherdi<mode>): Prepend "%P5" to opcode.
> >         (*avx512f_gatherdi<mode>_2): Likewise.
> >         (*avx512f_scattersi<mode>): Likewise.
> >         (*avx512f_scatterdi<mode>): Likewise.
> >
> > gcc/testsuite/
> >
> >         PR target/89523
> >         * gcc.target/i386/pr89523-1a.c: New test.
> >         * gcc.target/i386/pr89523-1b.c: Likewise.
> >         * gcc.target/i386/pr89523-2.c: Likewise.
> >         * gcc.target/i386/pr89523-3.c: Likewise.
> >         * gcc.target/i386/pr89523-4.c: Likewise.
> >         * gcc.target/i386/pr89523-5.c: Likewise.
> >         * gcc.target/i386/pr89523-6.c: Likewise.
> >         * gcc.target/i386/pr89523-7.c: Likewise.
> >         * gcc.target/i386/pr89523-8.c: Likewise.
> >         * gcc.target/i386/pr89523-9.c: Likewise.
> > ---
> >  gcc/config/i386/i386.c                     | 35 +++++++++++++++-
> >  gcc/config/i386/sse.md                     | 46 +++++++++++-----------
> >  gcc/testsuite/gcc.target/i386/pr89523-1a.c | 24 +++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-1b.c |  7 ++++
> >  gcc/testsuite/gcc.target/i386/pr89523-2.c  | 37 +++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-3.c  | 36 +++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-4.c  | 36 +++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-5.c  | 39 ++++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-6.c  | 38 ++++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-7.c  | 42 ++++++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-8.c  | 41 +++++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr89523-9.c  | 30 ++++++++++++++
> >  12 files changed, 386 insertions(+), 25 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1a.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1b.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index c8f9957163b..ae9befb638d 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -17793,7 +17793,8 @@ print_reg (rtx x, int code, FILE *file)
> >     y -- print "st(0)" instead of "st" as a register.
> >     d -- print duplicated register operand for AVX instruction.
> >     D -- print condition for SSE cmp instruction.
> > -   P -- if PIC, print an @PLT suffix.
> > +   P -- if PIC, print an @PLT suffix or print addr32 prefix for
> > +       TARGET_X32 with UNSPEC_VSIBADDR operand.
> >     p -- print raw symbol name.
> >     X -- don't print any sort of PIC '@' suffix for a symbol.
> >     & -- print some in-use local-dynamic symbol name.
> > @@ -18010,6 +18011,37 @@ ix86_print_operand (FILE *file, rtx x, int code)
> >           output_operand_lossage ("invalid operand size for operand code 'Z'");
> >           return;
> >
> > +       case 'P':
> > +         if (MEM_P (x))
> > +           {
> > +             x = XEXP (x, 0);
> > +             if (GET_CODE (x) == UNSPEC
> > +                 || XINT (x, 1) == UNSPEC_VSIBADDR)
> > +               {
> > +                 if (TARGET_X32)
> > +                   {
> > +                     /* NB: 32-bit indices in VSIB address are
> > +                        sign-extended to 64 bits. In x32, if 32-bit
> > +                        address 0xf7fa3010 is sign-extended to
> > +                        0xfffffffff7fa3010 which is invalid address.
> > +                        Add addr32 prefix if there is no base register
> > +                        nor symbol.  */
> > +                     bool ok;
> > +                     struct ix86_address parts;
> > +                     ok = ix86_decompose_address (XVECEXP (x, 0, 0),
> > +                                                  &parts);
> > +                     gcc_assert (ok && parts.index == NULL_RTX);
> > +                     if (parts.base == NULL_RTX
> > +                         && (parts.disp == NULL_RTX
> > +                             || !symbolic_operand (parts.disp,
> > +                                                   GET_MODE (parts.disp))))
> > +                       fputs ("addr32 ", file);
> > +                   }
> > +                 return;
> > +               }
> > +           }
> > +         break;
> > +
> >         case 'd':
> >         case 'b':
> >         case 'w':
> > @@ -18021,7 +18053,6 @@ ix86_print_operand (FILE *file, rtx x, int code)
> >         case 'y':
> >         case 'x':
> >         case 'X':
> > -       case 'P':
> >         case 'p':
> >         case 'V':
> >           break;
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index ac299495b2c..ac500f9cc63 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -17401,9 +17401,9 @@
> >      case 3:
> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >          gas changed what it requires incompatibly.  */
> > -      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%P5vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> >      case 2:
> > -      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%P5vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> >      default:
> >        gcc_unreachable ();
> >      }
> > @@ -17448,9 +17448,9 @@
> >      case 3:
> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >          gas changed what it requires incompatibly.  */
> > -      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%P5vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> >      case 2:
> > -      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%P5vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> >      default:
> >        gcc_unreachable ();
> >      }
> > @@ -17496,10 +17496,10 @@
> >      case 7:
> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >          gas changed what it requires incompatibly.  */
> > -      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%P5vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> >      case 2:
> >      case 6:
> > -      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%P5vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
> >      default:
> >        gcc_unreachable ();
> >      }
> > @@ -17545,10 +17545,10 @@
> >      case 7:
> >        /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >          gas changed what it requires incompatibly.  */
> > -      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%P5vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> >      case 2:
> >      case 6:
> > -      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> > +      return "%P5vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
> >      default:
> >        gcc_unreachable ();
> >      }
> > @@ -20292,7 +20292,7 @@
> >           UNSPEC_GATHER))
> >     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
> > +  "%P7v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20312,7 +20312,7 @@
> >           UNSPEC_GATHER))
> >     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
> > +  "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20353,7 +20353,7 @@
> >           UNSPEC_GATHER))
> >     (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
> > +  "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20375,8 +20375,8 @@
> >    "TARGET_AVX2"
> >  {
> >    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
> > -    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
> > -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
> > +    return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
> > +  return "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
> >  }
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> > @@ -20400,7 +20400,7 @@
> >                      (const_int 2) (const_int 3)])))
> >     (clobber (match_scratch:VI4F_256 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
> > +  "%P7v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20423,7 +20423,7 @@
> >                      (const_int 2) (const_int 3)])))
> >     (clobber (match_scratch:VI4F_256 1 "=&x"))]
> >    "TARGET_AVX2"
> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
> > +  "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "vex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20463,7 +20463,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
> > +  "%P6v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20484,7 +20484,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
> > +  "%P5v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20525,7 +20525,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
> > +  "%P6v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20550,11 +20550,11 @@
> >    if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
> >      {
> >        if (<MODE_SIZE> != 64)
> > -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
> > +       return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
> >        else
> > -       return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
> > +       return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
> >      }
> > -  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
> > +  return "%P5v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
> >  }
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> > @@ -20593,7 +20593,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> > +  "%P5v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > @@ -20631,7 +20631,7 @@
> >    "TARGET_AVX512F"
> >  ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
> >  ;; gas changed what it requires incompatibly.
> > -  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> > +  "%P5v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
> >    [(set_attr "type" "ssemov")
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1a.c b/gcc/testsuite/gcc.target/i386/pr89523-1a.c
> > new file mode 100644
> > index 00000000000..0d0edab0363
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-1a.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-maddress-mode=short -mx32 -Ofast -funroll-loops -march=haswell" } */
> > +/* { dg-final { scan-assembler-not "\tvgather" } } */
> > +/* { dg-final { scan-assembler "addr32 vgather" } } */
> > +
> > +void foo (void);
> > +
> > +extern float *ncost;
> > +
> > +float
> > +bar (int type, int num)
> > +{
> > +  int i;
> > +  float cost;
> > +
> > +  cost = 0;
> > +  for (i = 0; i < num; i++)
> > +    if (type)
> > +      cost += ncost[i];
> > +    else
> > +      foo ();
> > +  return (cost);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1b.c b/gcc/testsuite/gcc.target/i386/pr89523-1b.c
> > new file mode 100644
> > index 00000000000..6a5c1d43625
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-1b.c
> > @@ -0,0 +1,7 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-maddress-mode=long -mx32 -Ofast -funroll-loops -march=haswell" } */
> > +/* { dg-final { scan-assembler-not "\tvgather" } } */
> > +/* { dg-final { scan-assembler "addr32 vgather" } } */
> > +
> > +#include "pr89523-1a.c"
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c
> > new file mode 100644
> > index 00000000000..2ffbffe5e40
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c
> > @@ -0,0 +1,37 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -march=haswell" } */
> > +/* { dg-final { scan-assembler "\tvgather" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> > +
> > +typedef double __v2df __attribute__ ((__vector_size__ (16)));
> > +typedef int __v4si __attribute__ ((__vector_size__ (16)));
> > +typedef long long __v2di __attribute__ ((__vector_size__ (16)));
> > +
> > +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
> > +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
> > +
> > +extern __inline __m128d
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
> > +{
> > +  __v2df __zero = { 0.0, 0.0 };
> > +  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
> > +  __v2df x = x;
> > +
> > +  return (__m128d) __builtin_ia32_gathersiv2df (x,
> > +                                               __base,
> > +                                               (__v4si)__index,
> > +                                               __mask,
> > +                                               __scale);
> > +}
> > +
> > +__m128d x;
> > +double *base;
> > +__m128i idx;
> > +
> > +void extern
> > +avx2_test (void)
> > +{
> > +  x = _mm_i32gather_pd (base, idx, 1);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c
> > new file mode 100644
> > index 00000000000..fc3631b694b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c
> > @@ -0,0 +1,36 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -march=haswell" } */
> > +/* { dg-final { scan-assembler "\tvgather" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> > +
> > +typedef double __v2df __attribute__ ((__vector_size__ (16)));
> > +typedef int __v4si __attribute__ ((__vector_size__ (16)));
> > +typedef long long __v2di __attribute__ ((__vector_size__ (16)));
> > +
> > +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
> > +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
> > +
> > +extern __inline __m128d
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_i64gather_pd (double const *__base, __m128i __index, const int __scale)
> > +{
> > +  __v2df __zero = { 0.0, 0.0 };
> > +  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
> > +
> > +  return (__m128d) __builtin_ia32_gatherdiv2df (__zero,
> > +                                               __base,
> > +                                               (__v2di)__index,
> > +                                               __mask,
> > +                                               __scale);
> > +}
> > +
> > +__m128d x;
> > +double *base;
> > +__m128i idx;
> > +
> > +void extern
> > +avx2_test (void)
> > +{
> > +  x = _mm_i64gather_pd (base, idx, 1);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c
> > new file mode 100644
> > index 00000000000..3436e5dcae3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c
> > @@ -0,0 +1,36 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -march=haswell" } */
> > +/* { dg-final { scan-assembler-not "\tvgather" } } */
> > +/* { dg-final { scan-assembler "addr32 vgather" } } */
> > +
> > +typedef double __v2df __attribute__ ((__vector_size__ (16)));
> > +typedef int __v4si __attribute__ ((__vector_size__ (16)));
> > +typedef long long __v2di __attribute__ ((__vector_size__ (16)));
> > +
> > +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
> > +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
> > +
> > +extern __inline __m128d
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
> > +{
> > +  __v2df __zero = { 0.0, 0.0 };
> > +  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
> > +  __v2df x = x;
> > +
> > +  return (__m128d) __builtin_ia32_gathersiv2df (x,
> > +                                               __base,
> > +                                               (__v4si)__index,
> > +                                               __mask,
> > +                                               __scale);
> > +}
> > +
> > +__m128d x;
> > +__m128i idx;
> > +
> > +void extern
> > +avx2_test (void)
> > +{
> > +  x = _mm_i32gather_pd ((void *) 0, idx, 1);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c
> > new file mode 100644
> > index 00000000000..6a769c7a249
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512pf" } */
> > +/* { dg-final { scan-assembler "\tvgather" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vgather" } } */
> > +
> > +typedef int __v8si __attribute__ ((__vector_size__ (32)));
> > +typedef long long __m256i __attribute__ ((__vector_size__ (32),
> > +                                         __may_alias__));
> > +typedef unsigned char  __mmask8;
> > +
> > +extern __inline void
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
> > +                             int __scale, int __hint)
> > +{
> > +  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
> > +                             __scale, __hint);
> > +}
> > +
> > +extern __inline void
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
> > +                                  void const *__addr, int __scale, int __hint)
> > +{
> > +  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
> > +                             __hint);
> > +}
> > +
> > +volatile __m256i idx;
> > +volatile __mmask8 m8;
> > +void *base;
> > +
> > +void extern
> > +avx512pf_test (void)
> > +{
> > +  _mm512_prefetch_i32gather_pd (idx, base, 8, 3);
> > +  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 3);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c
> > new file mode 100644
> > index 00000000000..82f795e085c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c
> > @@ -0,0 +1,38 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512pf" } */
> > +/* { dg-final { scan-assembler-not "\tvgather" } } */
> > +/* { dg-final { scan-assembler "addr32 vgather" } } */
> > +
> > +typedef int __v8si __attribute__ ((__vector_size__ (32)));
> > +typedef long long __m256i __attribute__ ((__vector_size__ (32),
> > +                                         __may_alias__));
> > +typedef unsigned char  __mmask8;
> > +
> > +extern __inline void
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
> > +                             int __scale, int __hint)
> > +{
> > +  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
> > +                             __scale, __hint);
> > +}
> > +
> > +extern __inline void
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
> > +                                  void const *__addr, int __scale, int __hint)
> > +{
> > +  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
> > +                             __hint);
> > +}
> > +
> > +volatile __m256i idx;
> > +volatile __mmask8 m8;
> > +
> > +void extern
> > +avx512pf_test (void)
> > +{
> > +  _mm512_prefetch_i32gather_pd (idx, (void *) 0, 8, 3);
> > +  _mm512_mask_prefetch_i32gather_pd (idx, m8, (void *) 0, 8, 3);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c
> > new file mode 100644
> > index 00000000000..030b00d268a
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c
> > @@ -0,0 +1,42 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512f" } */
> > +/* { dg-final { scan-assembler "\tvscatter" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
> > +
> > +typedef int __v8si __attribute__ ((__vector_size__ (32)));
> > +typedef double __v8df __attribute__ ((__vector_size__ (64)));
> > +typedef long long __m256i __attribute__ ((__vector_size__ (32),
> > +                                         __may_alias__));
> > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> > +typedef unsigned char  __mmask8;
> > +
> > +extern __inline void
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
> > +                     int __scale)
> > +{
> > +  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
> > +                               (__v8si) __index, (__v8df) __v1, __scale);
> > +}
> > +
> > +extern __inline void
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
> > +                          __m256i __index, __m512d __v1, int __scale)
> > +{
> > +  __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
> > +                               (__v8df) __v1, __scale);
> > +}
> > +
> > +volatile __m512d src;
> > +volatile __m256i idx;
> > +volatile __mmask8 m8;
> > +double *addr;
> > +
> > +void extern
> > +avx512f_test (void)
> > +{
> > +  _mm512_i32scatter_pd (addr, idx, src, 8);
> > +  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c
> > new file mode 100644
> > index 00000000000..465c985c2b7
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c
> > @@ -0,0 +1,41 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512f" } */
> > +/* { dg-final { scan-assembler "\tvscatter" } } */
> > +/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
> > +
> > +typedef long long __v8di __attribute__ ((__vector_size__ (64)));
> > +typedef double __v8df __attribute__ ((__vector_size__ (64)));
> > +typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
> > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> > +typedef unsigned char  __mmask8;
> > +
> > +extern __inline void
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
> > +                     int __scale)
> > +{
> > +  __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
> > +                               (__v8di) __index, (__v8df) __v1, __scale);
> > +}
> > +
> > +extern __inline void
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
> > +                          __m512i __index, __m512d __v1, int __scale)
> > +{
> > +  __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
> > +                               (__v8df) __v1, __scale);
> > +}
> > +
> > +volatile __m512d src;
> > +volatile __m512i idx;
> > +volatile __mmask8 m8;
> > +double *addr;
> > +
> > +void extern
> > +avx512f_test (void)
> > +{
> > +  _mm512_i64scatter_pd (addr, idx, src, 8);
> > +  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c
> > new file mode 100644
> > index 00000000000..e9323126bd6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c
> > @@ -0,0 +1,30 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-require-effective-target maybe_x32 } */
> > +/* { dg-options "-mx32 -O2 -mavx512f" } */
> > +/* { dg-final { scan-assembler-not "\tvscatter" } } */
> > +/* { dg-final { scan-assembler "addr32 vscatter" } } */
> > +
> > +typedef int __v8si __attribute__ ((__vector_size__ (32)));
> > +typedef double __v8df __attribute__ ((__vector_size__ (64)));
> > +typedef long long __m256i __attribute__ ((__vector_size__ (32),
> > +                                         __may_alias__));
> > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
> > +typedef unsigned char  __mmask8;
> > +
> > +extern __inline void
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
> > +                     int __scale)
> > +{
> > +  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
> > +                               (__v8si) __index, (__v8df) __v1, __scale);
> > +}
> > +
> > +volatile __m512d src;
> > +volatile __m256i idx;
> > +
> > +void extern
> > +avx512f_test (void)
> > +{
> > +  _mm512_i32scatter_pd ((void *) 0, idx, src, 8);
> > +}
> > --
> > 2.20.1
> >



--
H.J.

[-- Attachment #2: 0001-x32-Add-addr32-prefix-to-VSIB-address.patch --]
[-- Type: text/x-patch, Size: 25088 bytes --]

From 5533e852483b8fbebab6da2cff7c36b8b51e227e Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 27 Feb 2019 15:46:51 -0800
Subject: [PATCH] x32: Add addr32 prefix to VSIB address

32-bit indices in VSIB address are sign-extended to 64 bits.  In x32,
when 32-bit indices are used as addresses, like in

vgatherdps %ymm7, 0(,%ymm9,1), %ymm6

32-bit indices, 0xf7fa3010, is sign-extended to 0xfffffffff7fa3010 which
is invalid address.  Add addr32 prefix to UNSPEC_VSIBADDR instructions
for x32 if there is no base register nor symbol.

This fixes 175.vpr and 254.gap in SPEC CPU 2000 on x32 with

-Ofast -funroll-loops -march=haswell

gcc/

	PR target/89523
	* config/i386/i386.c (ix86_print_operand): Handle 'M' to add
	addr32 prefix to VSIB address for X32.
	* config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend
	"%M2" to opcode.
	(*avx512pf_gatherpf<mode>df_mask): Likewise.
	(*avx512pf_scatterpf<mode>sf_mask): Likewise.
	(*avx512pf_scatterpf<mode>df_mask): Likewise.
	(*avx2_gathersi<mode>): Prepend "%M3" to opcode.
	(*avx2_gathersi<mode>_2): Prepend "%M2" to opcode.
	(*avx2_gatherdi<mode>): Prepend "%M3" to opcode.
	(*avx2_gatherdi<mode>_2): Prepend "%M2" to opcode.
	(*avx2_gatherdi<mode>_3): Prepend "%M3" to opcode.
	(*avx2_gatherdi<mode>_4): Prepend "%M2" to opcode.`
	(*avx512f_gathersi<mode>): Prepend "%M4" to opcode.
	(*avx512f_gathersi<mode>_2): Prepend "%M3" to opcode.
	(*avx512f_gatherdi<mode>): Prepend "%M4" to opcode.
	(*avx512f_gatherdi<mode>_2): Prepend "%M3" to opcode.
	(*avx512f_scattersi<mode>): Prepend "%M0" to opcode.
	(*avx512f_scatterdi<mode>): Likewise.

gcc/testsuite/

	PR target/89523
	* gcc.target/i386/pr89523-1a.c: New test.
	* gcc.target/i386/pr89523-1b.c: Likewise.
	* gcc.target/i386/pr89523-2.c: Likewise.
	* gcc.target/i386/pr89523-3.c: Likewise.
	* gcc.target/i386/pr89523-4.c: Likewise.
	* gcc.target/i386/pr89523-5.c: Likewise.
	* gcc.target/i386/pr89523-6.c: Likewise.
	* gcc.target/i386/pr89523-7.c: Likewise.
	* gcc.target/i386/pr89523-8.c: Likewise.
	* gcc.target/i386/pr89523-9.c: Likewise.
---
 gcc/config/i386/i386.c                     | 21 ++++++++++
 gcc/config/i386/sse.md                     | 46 +++++++++++-----------
 gcc/testsuite/gcc.target/i386/pr89523-1a.c | 24 +++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-1b.c |  7 ++++
 gcc/testsuite/gcc.target/i386/pr89523-2.c  | 37 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-3.c  | 36 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-4.c  | 36 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-5.c  | 39 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-6.c  | 38 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-7.c  | 42 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-8.c  | 41 +++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89523-9.c  | 30 ++++++++++++++
 12 files changed, 374 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89523-9.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index fe459071aaf..1f94a45909d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17813,6 +17813,7 @@ print_reg (rtx x, int code, FILE *file)
    ; -- print a semicolon (after prefixes due to bug in older gas).
    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
    ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+   M -- print addr32 prefix for TARGET_X32 with VSIB address.
    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
  */
 
@@ -18360,6 +18361,26 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	  putc (TARGET_AVX2 ? 'i' : 'f', file);
 	  return;
 
+	case 'M':
+	  if (TARGET_X32)
+	    {
+	      /* NB: 32-bit indices in VSIB address are sign-extended
+		 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
+		 sign-extended to 0xfffffffff7fa3010 which is invalid
+		 address.  Add addr32 prefix if there is no base
+		 register nor symbol.  */
+	      bool ok;
+	      struct ix86_address parts;
+	      ok = ix86_decompose_address (x, &parts);
+	      gcc_assert (ok && parts.index == NULL_RTX);
+	      if (parts.base == NULL_RTX
+		  && (parts.disp == NULL_RTX
+		      || !symbolic_operand (parts.disp,
+					    GET_MODE (parts.disp))))
+		fputs ("addr32 ", file);
+	    }
+	  return;
+
 	case '^':
 	  if (TARGET_64BIT && Pmode != word_mode)
 	    fputs ("addr32 ", file);
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d4c01407f4a..86bf8661808 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17486,9 +17486,9 @@
     case 3:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
-      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17533,9 +17533,9 @@
     case 3:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
-      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17581,10 +17581,10 @@
     case 7:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
     case 6:
-      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+      return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -17630,10 +17630,10 @@
     case 7:
       /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 	 gas changed what it requires incompatibly.  */
-      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     case 2:
     case 6:
-      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+      return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
     default:
       gcc_unreachable ();
     }
@@ -20377,7 +20377,7 @@
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
+  "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20397,7 +20397,7 @@
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
+  "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20438,7 +20438,7 @@
 	  UNSPEC_GATHER))
    (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
+  "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20460,8 +20460,8 @@
   "TARGET_AVX2"
 {
   if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
-    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
+    return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
+  return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
 }
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
@@ -20485,7 +20485,7 @@
 		     (const_int 2) (const_int 3)])))
    (clobber (match_scratch:VI4F_256 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
+  "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20508,7 +20508,7 @@
 		     (const_int 2) (const_int 3)])))
    (clobber (match_scratch:VI4F_256 1 "=&x"))]
   "TARGET_AVX2"
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
+  "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20548,7 +20548,7 @@
   "TARGET_AVX512F"
 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
+  "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20569,7 +20569,7 @@
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
+  "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20610,7 +20610,7 @@
   "TARGET_AVX512F"
 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
+  "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20635,11 +20635,11 @@
   if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
     {
       if (<MODE_SIZE> != 64)
-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
+	return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
       else
-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
+	return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
     }
-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
+  return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
 }
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -20678,7 +20678,7 @@
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+  "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20716,7 +20716,7 @@
   "TARGET_AVX512F"
 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
 ;; gas changed what it requires incompatibly.
-  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+  "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1a.c b/gcc/testsuite/gcc.target/i386/pr89523-1a.c
new file mode 100644
index 00000000000..0d0edab0363
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-1a.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-maddress-mode=short -mx32 -Ofast -funroll-loops -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+void foo (void);
+
+extern float *ncost;
+
+float
+bar (int type, int num)
+{
+  int i;
+  float cost;
+
+  cost = 0;
+  for (i = 0; i < num; i++)
+    if (type)
+      cost += ncost[i];
+    else
+      foo ();
+  return (cost);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-1b.c b/gcc/testsuite/gcc.target/i386/pr89523-1b.c
new file mode 100644
index 00000000000..6a5c1d43625
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-1b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-maddress-mode=long -mx32 -Ofast -funroll-loops -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+#include "pr89523-1a.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-2.c b/gcc/testsuite/gcc.target/i386/pr89523-2.c
new file mode 100644
index 00000000000..2ffbffe5e40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
+{
+  __v2df __zero = { 0.0, 0.0 };
+  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
+  __v2df x = x;
+
+  return (__m128d) __builtin_ia32_gathersiv2df (x,
+						__base,
+						(__v4si)__index,
+						__mask,
+						__scale);
+}
+
+__m128d x;
+double *base;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i32gather_pd (base, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-3.c b/gcc/testsuite/gcc.target/i386/pr89523-3.c
new file mode 100644
index 00000000000..fc3631b694b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_pd (double const *__base, __m128i __index, const int __scale)
+{
+  __v2df __zero = { 0.0, 0.0 };
+  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
+
+  return (__m128d) __builtin_ia32_gatherdiv2df (__zero,
+						__base,
+						(__v2di)__index,
+						__mask,
+						__scale);
+}
+
+__m128d x;
+double *base;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i64gather_pd (base, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-4.c b/gcc/testsuite/gcc.target/i386/pr89523-4.c
new file mode 100644
index 00000000000..3436e5dcae3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-4.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
+{
+  __v2df __zero = { 0.0, 0.0 };
+  __v2df __mask = __builtin_ia32_cmpeqpd (__zero, __zero);
+  __v2df x = x;
+
+  return (__m128d) __builtin_ia32_gathersiv2df (x,
+						__base,
+						(__v4si)__index,
+						__mask,
+						__scale);
+}
+
+__m128d x;
+__m128i idx;
+
+void extern
+avx2_test (void)
+{
+  x = _mm_i32gather_pd ((void *) 0, idx, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c
new file mode 100644
index 00000000000..6a769c7a249
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c
@@ -0,0 +1,39 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512pf" } */
+/* { dg-final { scan-assembler "\tvgather" } } */
+/* { dg-final { scan-assembler-not "addr32 vgather" } } */
+
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
+			      int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
+			      __scale, __hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
+				   void const *__addr, int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
+			      __hint);
+}
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32gather_pd (idx, base, 8, 3);
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c
new file mode 100644
index 00000000000..82f795e085c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512pf" } */
+/* { dg-final { scan-assembler-not "\tvgather" } } */
+/* { dg-final { scan-assembler "addr32 vgather" } } */
+
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
+			      int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
+			      __scale, __hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
+				   void const *__addr, int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
+			      __hint);
+}
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32gather_pd (idx, (void *) 0, 8, 3);
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, (void *) 0, 8, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-7.c b/gcc/testsuite/gcc.target/i386/pr89523-7.c
new file mode 100644
index 00000000000..030b00d268a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-7.c
@@ -0,0 +1,42 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler "\tvscatter" } } */
+/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
+
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
+		      int __scale)
+{
+  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
+				(__v8si) __index, (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
+			   __m256i __index, __m512d __v1, int __scale)
+{
+  __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
+				(__v8df) __v1, __scale);
+}
+
+volatile __m512d src;
+volatile __m256i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i32scatter_pd (addr, idx, src, 8);
+  _mm512_mask_i32scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-8.c b/gcc/testsuite/gcc.target/i386/pr89523-8.c
new file mode 100644
index 00000000000..465c985c2b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-8.c
@@ -0,0 +1,41 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler "\tvscatter" } } */
+/* { dg-final { scan-assembler-not "addr32 vscatter" } } */
+
+typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
+		      int __scale)
+{
+  __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
+				(__v8di) __index, (__v8df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
+			   __m512i __index, __m512d __v1, int __scale)
+{
+  __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
+				(__v8df) __v1, __scale);
+}
+
+volatile __m512d src;
+volatile __m512i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i64scatter_pd (addr, idx, src, 8);
+  _mm512_mask_i64scatter_pd (addr, m8, idx, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89523-9.c b/gcc/testsuite/gcc.target/i386/pr89523-9.c
new file mode 100644
index 00000000000..e9323126bd6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89523-9.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-mx32 -O2 -mavx512f" } */
+/* { dg-final { scan-assembler-not "\tvscatter" } } */
+/* { dg-final { scan-assembler "addr32 vscatter" } } */
+
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef double __v8df __attribute__ ((__vector_size__ (64)));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef unsigned char  __mmask8;
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
+		      int __scale)
+{
+  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
+				(__v8si) __index, (__v8df) __v1, __scale);
+}
+
+volatile __m512d src;
+volatile __m256i idx;
+
+void extern
+avx512f_test (void)
+{
+  _mm512_i32scatter_pd ((void *) 0, idx, src, 8);
+}
-- 
2.20.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: V2 [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions
  2019-03-14  8:34           ` H.J. Lu
@ 2019-03-14  8:47             ` Uros Bizjak
  0 siblings, 0 replies; 8+ messages in thread
From: Uros Bizjak @ 2019-03-14  8:47 UTC (permalink / raw)
  To: gcc-patches; +Cc: H.J. Lu

On Thu, Mar 14, 2019 at 9:22 AM H.J. Lu <hjl.tools@gmail.com> wrote:

> > > We can't use the %X5 since %X5 is used on operands.
> >
> > So, please introduce some other modifier ("X" was not to be taken
> > literally, but *some* letter). Why are you overloading 'P'?
>
> Here is the updated patch with the 'M' modifier.
>
>
> gcc/
>
> PR target/89523
> * config/i386/i386.c (ix86_print_operand): Handle 'M' to add
> addr32 prefix to VSIB address for X32.
> * config/i386/sse.md (*avx512pf_gatherpf<mode>sf_mask): Prepend
> "%M2" to opcode.
> (*avx512pf_gatherpf<mode>df_mask): Likewise.
> (*avx512pf_scatterpf<mode>sf_mask): Likewise.
> (*avx512pf_scatterpf<mode>df_mask): Likewise.
> (*avx2_gathersi<mode>): Prepend "%M3" to opcode.
> (*avx2_gathersi<mode>_2): Prepend "%M2" to opcode.
> (*avx2_gatherdi<mode>): Prepend "%M3" to opcode.
> (*avx2_gatherdi<mode>_2): Prepend "%M2" to opcode.
> (*avx2_gatherdi<mode>_3): Prepend "%M3" to opcode.
> (*avx2_gatherdi<mode>_4): Prepend "%M2" to opcode.`
> (*avx512f_gathersi<mode>): Prepend "%M4" to opcode.
> (*avx512f_gathersi<mode>_2): Prepend "%M3" to opcode.
> (*avx512f_gatherdi<mode>): Prepend "%M4" to opcode.
> (*avx512f_gatherdi<mode>_2): Prepend "%M3" to opcode.
> (*avx512f_scattersi<mode>): Prepend "%M0" to opcode.
> (*avx512f_scatterdi<mode>): Likewise.
>
> gcc/testsuite/
>
> PR target/89523
> * gcc.target/i386/pr89523-1a.c: New test.
> * gcc.target/i386/pr89523-1b.c: Likewise.
> * gcc.target/i386/pr89523-2.c: Likewise.
> * gcc.target/i386/pr89523-3.c: Likewise.
> * gcc.target/i386/pr89523-4.c: Likewise.
> * gcc.target/i386/pr89523-5.c: Likewise.
> * gcc.target/i386/pr89523-6.c: Likewise.
> * gcc.target/i386/pr89523-7.c: Likewise.
> * gcc.target/i386/pr89523-8.c: Likewise.
> * gcc.target/i386/pr89523-9.c: Likewise.

The patch looks safe and relatively non-intrusive, so OK for gcc-9 and
backports.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-03-14  8:45 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-28 19:22 [PATCH] x32: Add addr32 prefix to UNSPEC_VSIBADDR instructions H.J. Lu
2019-03-03 17:27 ` Uros Bizjak
2019-03-03 21:18   ` H.J. Lu
2019-03-03 21:34     ` Uros Bizjak
2019-03-04 13:54       ` V2 " H.J. Lu
2019-03-04 14:09         ` Uros Bizjak
2019-03-14  8:34           ` H.J. Lu
2019-03-14  8:47             ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).