public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 6/6] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV
  2020-02-29 14:16 V2 [PATCH 0/6] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move H.J. Lu
                   ` (3 preceding siblings ...)
  2020-02-29 14:16 ` [PATCH 5/6] i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV H.J. Lu
@ 2020-02-29 14:16 ` H.J. Lu
  2020-03-12  3:53   ` Jeff Law
  2020-02-29 15:30 ` [PATCH 3/6] i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV H.J. Lu
  5 siblings, 1 reply; 16+ messages in thread
From: H.J. Lu @ 2020-02-29 14:16 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek, Jeffrey Law, Jan Hubicka, Uros Bizjak

There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

Remove ext_sse_reg_operand since it is no longer needed.

	PR target/89229
	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_V1DF and
	MODE_V2SF.
	* config/i386/mmx.md (MMXMODE:*mov<mode>_internal): Call
	ix86_output_ssemov for TYPE_SSEMOV.  Remove ext_sse_reg_operand
	check.
	* config/i386/predicates.md (ext_sse_reg_operand): Removed.
---
 gcc/config/i386/i386.c        | 10 ++++++++++
 gcc/config/i386/mmx.md        | 29 ++---------------------------
 gcc/config/i386/predicates.md |  5 -----
 3 files changed, 12 insertions(+), 32 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1d3b784532b..f34a708cdc3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5142,6 +5142,16 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
       else
 	return "%vmovss\t{%1, %0|%0, %1}";
 
+    case MODE_V1DF:
+      gcc_assert (!TARGET_AVX);
+       return "movlpd\t{%1, %0|%0, %1}";
+
+    case MODE_V2SF:
+      if (TARGET_AVX && REG_P (operands[0]))
+	return "vmovlps\t{%1, %d0|%d0, %1}";
+      else
+	return "%vmovlps\t{%1, %0|%0, %1}";
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e1c8b0af4c7..c3f195bb34a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -118,29 +118,7 @@ (define_insn "*mov<mode>_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_DI:
-	  /* Handle broken assemblers that require movd instead of movq.  */
-	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
-	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
-	    return "%vmovd\t{%1, %0|%0, %1}";
-	  return "%vmovq\t{%1, %0|%0, %1}";
-	case MODE_TI:
-	  return "%vmovdqa\t{%1, %0|%0, %1}";
-	case MODE_XI:
-	  return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
-
-	case MODE_V2SF:
-	  if (TARGET_AVX && REG_P (operands[0]))
-	    return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
-	  return "%vmovlps\t{%1, %0|%0, %1}";
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     default:
       gcc_unreachable ();
@@ -189,10 +167,7 @@ (define_insn "*mov<mode>_internal"
      (cond [(eq_attr "alternative" "2")
 	      (const_string "SI")
 	    (eq_attr "alternative" "11,12")
-	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
-			  (match_operand 1 "ext_sse_reg_operand"))
-			(const_string "XI")
-		     (match_test "<MODE>mode == V2SFmode")
+	      (cond [(match_test "<MODE>mode == V2SFmode")
 		       (const_string "V4SF")
 		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 1119366d54e..71f4cb1193c 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -61,11 +61,6 @@ (define_predicate "sse_reg_operand"
   (and (match_code "reg")
        (match_test "SSE_REGNO_P (REGNO (op))")))
 
-;; True if the operand is an AVX-512 new register.
-(define_predicate "ext_sse_reg_operand"
-  (and (match_code "reg")
-       (match_test "EXT_REX_SSE_REGNO_P (REGNO (op))")))
-
 ;; Return true if op is a QImode register.
 (define_predicate "any_QIreg_operand"
   (and (match_code "reg")
-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* V2 [PATCH 0/6] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move
@ 2020-02-29 14:16 H.J. Lu
  2020-02-29 14:16 ` [PATCH 1/6] i386: Properly encode vector registers in " H.J. Lu
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: H.J. Lu @ 2020-02-29 14:16 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek, Jeffrey Law, Jan Hubicka, Uros Bizjak

This patch set was originally submitted in Feb 2019:

https://gcc.gnu.org/ml/gcc-patches/2019-02/msg01841.html

I broke it into 6 smaller patches for easy review.

On x86, when AVX and AVX512 are enabled, vector move instructions can
be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512):

   0:	c5 f9 6f d1          	vmovdqa %xmm1,%xmm2
   4:	62 f1 fd 08 6f d1    	vmovdqa64 %xmm1,%xmm2

We prefer VEX encoding over EVEX since VEX is shorter.  Also AVX512F
only supports 512-bit vector moves.  AVX512F + AVX512VL supports 128-bit
and 256-bit vector moves.  xmm16-xmm31 and ymm16-ymm31 are disallowed in
128-bit and 256-bit modes when AVX512VL is disabled.  Mode attributes on
x86 vector move patterns indicate target preferences of vector move
encoding.  For scalar register to register move, we can use 512-bit
vector move instructions to move 32-bit/64-bit scalar if AVX512VL isn't
available.  With AVX512F and AVX512VL, we should use VEX encoding for
128-bit/256-bit vector moves if upper 16 vector registers aren't used.
This patch adds a function, ix86_output_ssemov, to generate vector moves:

1. If zmm registers are used, use EVEX encoding.
2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding
will be generated.
3. If xmm16-xmm31/ymm16-ymm31 registers are used:
   a. With AVX512VL, AVX512VL vector moves will be generated.
   b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register
      move will be done with zmm register move.

There is no need to set mode attribute to XImode explicitly since
ix86_output_ssemov can properly encode xmm16-xmm31/ymm16-ymm31 registers
with and without AVX512VL.

Tested on AVX2 and AVX512 with and without --with-arch=native.

H.J. Lu (6):
  i386: Properly encode vector registers in vector move
  i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV

 gcc/config/i386/i386-protos.h                 |   2 +
 gcc/config/i386/i386.c                        | 242 ++++++++++++++++++
 gcc/config/i386/i386.md                       | 212 +--------------
 gcc/config/i386/mmx.md                        |  29 +--
 gcc/config/i386/predicates.md                 |   5 -
 gcc/config/i386/sse.md                        |  98 +------
 .../gcc.target/i386/avx512vl-vmovdqa64-1.c    |   7 +-
 gcc/testsuite/gcc.target/i386/pr89229-2a.c    |  15 ++
 gcc/testsuite/gcc.target/i386/pr89229-2b.c    |  13 +
 gcc/testsuite/gcc.target/i386/pr89229-2c.c    |   6 +
 gcc/testsuite/gcc.target/i386/pr89229-3a.c    |  16 ++
 gcc/testsuite/gcc.target/i386/pr89229-3b.c    |  12 +
 gcc/testsuite/gcc.target/i386/pr89229-3c.c    |   6 +
 gcc/testsuite/gcc.target/i386/pr89229-4a.c    |  17 ++
 gcc/testsuite/gcc.target/i386/pr89229-4b.c    |   6 +
 gcc/testsuite/gcc.target/i386/pr89229-4c.c    |   7 +
 gcc/testsuite/gcc.target/i386/pr89229-5a.c    |  17 ++
 gcc/testsuite/gcc.target/i386/pr89229-5b.c    |   6 +
 gcc/testsuite/gcc.target/i386/pr89229-5c.c    |   7 +
 gcc/testsuite/gcc.target/i386/pr89229-6a.c    |  16 ++
 gcc/testsuite/gcc.target/i386/pr89229-6b.c    |   7 +
 gcc/testsuite/gcc.target/i386/pr89229-6c.c    |   6 +
 gcc/testsuite/gcc.target/i386/pr89229-7a.c    |  16 ++
 gcc/testsuite/gcc.target/i386/pr89229-7b.c    |   6 +
 gcc/testsuite/gcc.target/i386/pr89229-7c.c    |   6 +
 gcc/testsuite/gcc.target/i386/pr89346.c       |  15 ++
 26 files changed, 465 insertions(+), 330 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89346.c

-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 4/6] i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV
  2020-02-29 14:16 V2 [PATCH 0/6] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move H.J. Lu
  2020-02-29 14:16 ` [PATCH 1/6] i386: Properly encode vector registers in " H.J. Lu
@ 2020-02-29 14:16 ` H.J. Lu
  2020-03-12  3:41   ` Jeff Law
  2020-02-29 14:16 ` [PATCH 2/6] i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV H.J. Lu
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: H.J. Lu @ 2020-02-29 14:16 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek, Jeffrey Law, Jan Hubicka, Uros Bizjak

There is no need to set mode attribute to XImode nor V8DFmode since
ix86_output_ssemov can properly encode xmm16-xmm31 registers with and
without AVX512VL.

gcc/

	PR target/89229
	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_DF.
	* config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov
	for TYPE_SSEMOV.  Remove TARGET_AVX512F, TARGET_PREFER_AVX256,
	TARGET_AVX512VL and ext_sse_reg_operand check.

gcc/testsuite/

	PR target/89229
	* gcc.target/i386/pr89229-6a.c: New test.
	* gcc.target/i386/pr89229-6b.c: Likewise.
	* gcc.target/i386/pr89229-6c.c: Likewise.
---
 gcc/config/i386/i386.c                     |  6 +++
 gcc/config/i386/i386.md                    | 44 ++--------------------
 gcc/testsuite/gcc.target/i386/pr89229-6a.c | 16 ++++++++
 gcc/testsuite/gcc.target/i386/pr89229-6b.c |  7 ++++
 gcc/testsuite/gcc.target/i386/pr89229-6c.c |  6 +++
 5 files changed, 38 insertions(+), 41 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6c.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c28c162282a..a6fe9894ab8 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5130,6 +5130,12 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
     case MODE_SI:
       return "%vmovd\t{%1, %0|%0, %1}";
 
+    case MODE_DF:
+      if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
+	return "vmovsd\t{%d1, %0|%0, %d1}";
+      else
+	return "%vmovsd\t{%1, %0|%0, %1}";
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e9537fadfe8..060a34c4bd4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3307,37 +3307,7 @@ (define_insn "*movdf_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_DF:
-	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
-	    return "vmovsd\t{%d1, %0|%0, %d1}";
-	  return "%vmovsd\t{%1, %0|%0, %1}";
-
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-	case MODE_V8DF:
-	  return "vmovapd\t{%g1, %g0|%g0, %g1}";
-	case MODE_V2DF:
-	  return "%vmovapd\t{%1, %0|%0, %1}";
-
-	case MODE_V2SF:
-	  gcc_assert (!TARGET_AVX);
-	  return "movlps\t{%1, %0|%0, %1}";
-	case MODE_V1DF:
-	  gcc_assert (!TARGET_AVX);
-	  return "movlpd\t{%1, %0|%0, %1}";
-
-	case MODE_DI:
-	  /* Handle broken assemblers that require movd instead of movq.  */
-	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
-	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
-	    return "%vmovd\t{%1, %0|%0, %1}";
-	  return "%vmovq\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     default:
       gcc_unreachable ();
@@ -3391,10 +3361,7 @@ (define_insn "*movdf_internal"
 
 	       /* xorps is one byte shorter for non-AVX targets.  */
 	       (eq_attr "alternative" "12,16")
-		 (cond [(and (match_test "TARGET_AVX512F")
-			     (not (match_test "TARGET_PREFER_AVX256")))
-			  (const_string "XI")
-			(match_test "TARGET_AVX")
+		 (cond [(match_test "TARGET_AVX")
 			  (const_string "V2DF")
 			(ior (not (match_test "TARGET_SSE2"))
 			     (match_test "optimize_function_for_size_p (cfun)"))
@@ -3410,12 +3377,7 @@ (define_insn "*movdf_internal"
 
 	       /* movaps is one byte shorter for non-AVX targets.  */
 	       (eq_attr "alternative" "13,17")
-		 (cond [(and (ior (not (match_test "TARGET_PREFER_AVX256"))
-				  (not (match_test "TARGET_AVX512VL")))
-			     (ior (match_operand 0 "ext_sse_reg_operand")
-				  (match_operand 1 "ext_sse_reg_operand")))
-			  (const_string "V8DF")
-			(match_test "TARGET_AVX")
+		 (cond [(match_test "TARGET_AVX")
 			  (const_string "DF")
 			(ior (not (match_test "TARGET_SSE2"))
 			     (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6a.c b/gcc/testsuite/gcc.target/i386/pr89229-6a.c
new file mode 100644
index 00000000000..5bc10d25619
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-6a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern double d;
+
+void
+foo1 (double x)
+{
+  register double xmm16 __asm ("xmm16") = x;
+  asm volatile ("" : "+v" (xmm16));
+  register double xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  d = xmm17;
+}
+
+/* { dg-final { scan-assembler-not "vmovapd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6b.c b/gcc/testsuite/gcc.target/i386/pr89229-6b.c
new file mode 100644
index 00000000000..b248a3726f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-6b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-6a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
+/* { dg-final { scan-assembler-not "vmovapd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6c.c b/gcc/testsuite/gcc.target/i386/pr89229-6c.c
new file mode 100644
index 00000000000..7a4d254670c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-6c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-6a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/6] i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV
  2020-02-29 14:16 V2 [PATCH 0/6] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move H.J. Lu
  2020-02-29 14:16 ` [PATCH 1/6] i386: Properly encode vector registers in " H.J. Lu
  2020-02-29 14:16 ` [PATCH 4/6] i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV H.J. Lu
@ 2020-02-29 14:16 ` H.J. Lu
  2020-03-12  3:32   ` Jeff Law
  2020-02-29 14:16 ` [PATCH 5/6] i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV H.J. Lu
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: H.J. Lu @ 2020-02-29 14:16 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek, Jeffrey Law, Jan Hubicka, Uros Bizjak

There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

gcc/

	PR target/89229
	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_DI.
	* config/i386/i386.md (*movdi_internal): Call ix86_output_ssemov
	for TYPE_SSEMOV.  Remove ext_sse_reg_operand and TARGET_AVX512VL
	check.

gcc/testsuite/

	PR target/89229
	* gcc.target/i386/pr89229-4a.c: New test.
	* gcc.target/i386/pr89229-4b.c: Likewise.
	* gcc.target/i386/pr89229-4c.c: Likewise.
---
 gcc/config/i386/i386.c                     |  9 +++++++
 gcc/config/i386/i386.md                    | 31 ++--------------------
 gcc/testsuite/gcc.target/i386/pr89229-4a.c | 17 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr89229-4b.c |  6 +++++
 gcc/testsuite/gcc.target/i386/pr89229-4c.c |  7 +++++
 5 files changed, 41 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4c.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7bbfbb4c5a7..baf70a64193 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5118,6 +5118,15 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
     case MODE_V4SF:
       return ix86_get_ssemov (operands, 16, insn_mode, mode);
 
+    case MODE_DI:
+      /* Handle broken assemblers that require movd instead of movq. */
+      if (!HAVE_AS_IX86_INTERUNIT_MOVQ
+	  && (GENERAL_REG_P (operands[0])
+	      || GENERAL_REG_P (operands[1])))
+	return "%vmovd\t{%1, %0|%0, %1}";
+      else
+	return "%vmovq\t{%1, %0|%0, %1}";
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index cea831b6086..d8462b3de37 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2054,31 +2054,7 @@ (define_insn "*movdi_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_DI:
-	  /* Handle broken assemblers that require movd instead of movq.  */
-	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
-	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
-	    return "%vmovd\t{%1, %0|%0, %1}";
-	  return "%vmovq\t{%1, %0|%0, %1}";
-
-	case MODE_TI:
-	  /* Handle AVX512 registers set.  */
-	  if (EXT_REX_SSE_REG_P (operands[0])
-	      || EXT_REX_SSE_REG_P (operands[1]))
-	    return "vmovdqa64\t{%1, %0|%0, %1}";
-	  return "%vmovdqa\t{%1, %0|%0, %1}";
-
-	case MODE_V2SF:
-	  gcc_assert (!TARGET_AVX);
-	  return "movlps\t{%1, %0|%0, %1}";
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     case TYPE_SSECVT:
       if (SSE_REG_P (operands[0]))
@@ -2164,10 +2140,7 @@ (define_insn "*movdi_internal"
      (cond [(eq_attr "alternative" "2")
 	      (const_string "SI")
 	    (eq_attr "alternative" "12,13")
-	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
-			  (match_operand 1 "ext_sse_reg_operand"))
-		       (const_string "TI")
-		     (match_test "TARGET_AVX")
+	      (cond [(match_test "TARGET_AVX")
 		       (const_string "TI")
 		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4a.c b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
new file mode 100644
index 00000000000..cb9b071e873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+extern long long i;
+
+long long
+foo1 (void)
+{
+  register long long xmm16 __asm ("xmm16") = i;
+  asm volatile ("" : "+v" (xmm16));
+  register long long xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  return xmm17;
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa64\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4b.c b/gcc/testsuite/gcc.target/i386/pr89229-4b.c
new file mode 100644
index 00000000000..023e81253a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-4a.c"
+
+/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4c.c b/gcc/testsuite/gcc.target/i386/pr89229-4c.c
new file mode 100644
index 00000000000..e02eb37c16d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-4a.c"
+
+/* { dg-final { scan-assembler-times "vmovdqa64\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/6] i386: Properly encode vector registers in vector move
  2020-02-29 14:16 V2 [PATCH 0/6] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move H.J. Lu
@ 2020-02-29 14:16 ` H.J. Lu
  2020-03-05 23:47   ` Jeff Law
  2020-02-29 14:16 ` [PATCH 4/6] i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV H.J. Lu
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: H.J. Lu @ 2020-02-29 14:16 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek, Jeffrey Law, Jan Hubicka, Uros Bizjak

On x86, when AVX and AVX512 are enabled, vector move instructions can
be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512):

   0:	c5 f9 6f d1          	vmovdqa %xmm1,%xmm2
   4:	62 f1 fd 08 6f d1    	vmovdqa64 %xmm1,%xmm2

We prefer VEX encoding over EVEX since VEX is shorter.  Also AVX512F
only supports 512-bit vector moves.  AVX512F + AVX512VL supports 128-bit
and 256-bit vector moves.  xmm16-xmm31 and ymm16-ymm31 are disallowed in
128-bit and 256-bit modes when AVX512VL is disabled.  Mode attributes on
x86 vector move patterns indicate target preferences of vector move
encoding.  For scalar register to register move, we can use 512-bit
vector move instructions to move 32-bit/64-bit scalar if AVX512VL isn't
available.  With AVX512F and AVX512VL, we should use VEX encoding for
128-bit/256-bit vector moves if upper 16 vector registers aren't used.
This patch adds a function, ix86_output_ssemov, to generate vector moves:

1. If zmm registers are used, use EVEX encoding.
2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding
will be generated.
3. If xmm16-xmm31/ymm16-ymm31 registers are used:
   a. With AVX512VL, AVX512VL vector moves will be generated.
   b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register
      move will be done with zmm register move.

There is no need to set mode attribute to XImode explicitly since
ix86_output_ssemov can properly encode xmm16-xmm31/ymm16-ymm31 registers
with and without AVX512VL.

Tested on AVX2 and AVX512 with and without --with-arch=native.

gcc/

	PR target/89229
	PR target/89346
	* config/i386/i386-protos.h (ix86_output_ssemov): New prototype.
	* config/i386/i386.c (ix86_get_ssemov): New function.
	(ix86_output_ssemov): Likewise.
	* config/i386/sse.md (VMOVE:mov<mode>_internal): Call
	ix86_output_ssemov for TYPE_SSEMOV.  Remove TARGET_AVX512VL
	check.
	(*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV.
	(*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV.
	Remove ext_sse_reg_operand and TARGET_AVX512VL check.
	(*movti_internal): Likewise.
	(*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV.

gcc/testsuite/

	PR target/89229
	PR target/89346
	* gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated.
	* gcc.target/i386/pr89346.c: New test.

gcc/testsuite/

	PR target/89229
	* gcc.target/i386/pr89229-2a.c: New test.
	* gcc.target/i386/pr89229-2b.c: Likewise.
	* gcc.target/i386/pr89229-2c.c: Likewise.
	* gcc.target/i386/pr89229-3a.c: Likewise.
	* gcc.target/i386/pr89229-3b.c: Likewise.
	* gcc.target/i386/pr89229-3c.c: Likewise.
---
 gcc/config/i386/i386-protos.h                 |   2 +
 gcc/config/i386/i386.c                        | 208 ++++++++++++++++++
 gcc/config/i386/i386.md                       |  86 +-------
 gcc/config/i386/sse.md                        |  98 +--------
 .../gcc.target/i386/avx512vl-vmovdqa64-1.c    |   7 +-
 gcc/testsuite/gcc.target/i386/pr89229-2a.c    |  15 ++
 gcc/testsuite/gcc.target/i386/pr89229-2b.c    |  13 ++
 gcc/testsuite/gcc.target/i386/pr89229-2c.c    |   6 +
 gcc/testsuite/gcc.target/i386/pr89229-3a.c    |  16 ++
 gcc/testsuite/gcc.target/i386/pr89229-3b.c    |  12 +
 gcc/testsuite/gcc.target/i386/pr89229-3c.c    |   6 +
 gcc/testsuite/gcc.target/i386/pr89346.c       |  15 ++
 12 files changed, 303 insertions(+), 181 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89346.c

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 266381ca5a6..39fcaa0ad5f 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -38,6 +38,8 @@ extern void ix86_expand_split_stack_prologue (void);
 extern void ix86_output_addr_vec_elt (FILE *, int);
 extern void ix86_output_addr_diff_elt (FILE *, int, int);
 
+extern const char *ix86_output_ssemov (rtx_insn *, rtx *);
+
 extern enum calling_abi ix86_cfun_abi (void);
 extern enum calling_abi ix86_function_type_abi (const_tree);
 
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index dac7a3fc5fd..7bbfbb4c5a7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -4915,6 +4915,214 @@ ix86_pre_reload_split (void)
 	  && !(cfun->curr_properties & PROP_rtl_split_insns));
 }
 
+/* Return the opcode of the TYPE_SSEMOV instruction.  To move from
+   or to xmm16-xmm31/ymm16-ymm31 registers, we either require
+   TARGET_AVX512VL or it is a register to register move which can
+   be done with zmm register move. */
+
+static const char *
+ix86_get_ssemov (rtx *operands, unsigned size,
+		 enum attr_mode insn_mode, machine_mode mode)
+{
+  char buf[128];
+  bool misaligned_p = (misaligned_operand (operands[0], mode)
+		       || misaligned_operand (operands[1], mode));
+  bool evex_reg_p = (size == 64
+		     || EXT_REX_SSE_REG_P (operands[0])
+		     || EXT_REX_SSE_REG_P (operands[1]));
+  machine_mode scalar_mode;
+
+  const char *opcode = NULL;
+  enum
+    {
+      opcode_int,
+      opcode_float,
+      opcode_double
+    } type = opcode_int;
+
+  switch (insn_mode)
+    {
+    case MODE_V16SF:
+    case MODE_V8SF:
+    case MODE_V4SF:
+      scalar_mode = E_SFmode;
+      type = opcode_float;
+      break;
+    case MODE_V8DF:
+    case MODE_V4DF:
+    case MODE_V2DF:
+      scalar_mode = E_DFmode;
+      type = opcode_double;
+      break;
+    case MODE_XI:
+    case MODE_OI:
+    case MODE_TI:
+      scalar_mode = GET_MODE_INNER (mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
+     we can only use zmm register move without memory operand.  */
+  if (evex_reg_p
+      && !TARGET_AVX512VL
+      && GET_MODE_SIZE (mode) < 64)
+    {
+      /* NB: Since ix86_hard_regno_mode_ok only allows xmm16-xmm31 or
+	 ymm16-ymm31 in 128/256 bit modes when AVX512VL is enabled,
+	 we get here only for xmm16-xmm31 or ymm16-ymm31 in 32/64 bit
+	 modes.  */
+      if (GET_MODE_SIZE (mode) >= 16
+	  || memory_operand (operands[0], mode)
+	  || memory_operand (operands[1], mode))
+	gcc_unreachable ();
+      size = 64;
+      switch (type)
+	{
+	case opcode_int:
+	  opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
+	  break;
+	case opcode_float:
+	  opcode = misaligned_p ? "vmovups" : "vmovaps";
+	  break;
+	case opcode_double:
+	  opcode = misaligned_p ? "vmovupd" : "vmovapd";
+	  break;
+	}
+    }
+  else if (SCALAR_FLOAT_MODE_P (scalar_mode))
+    {
+      switch (scalar_mode)
+	{
+	case E_SFmode:
+	  opcode = misaligned_p ? "%vmovups" : "%vmovaps";
+	  break;
+	case E_DFmode:
+	  opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
+	  break;
+	case E_TFmode:
+	  if (evex_reg_p)
+	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+	  else
+	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else if (SCALAR_INT_MODE_P (scalar_mode))
+    {
+      switch (scalar_mode)
+	{
+	case E_QImode:
+	  if (evex_reg_p)
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu8"
+			 : "vmovdqu64")
+		      : "vmovdqa64");
+	  else
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu8"
+			 : "%vmovdqu")
+		      : "%vmovdqa");
+	  break;
+	case E_HImode:
+	  if (evex_reg_p)
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu16"
+			 : "vmovdqu64")
+		      : "vmovdqa64");
+	  else
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu16"
+			 : "%vmovdqu")
+		      : "%vmovdqa");
+	  break;
+	case E_SImode:
+	  if (evex_reg_p)
+	    opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
+	  else
+	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
+	  break;
+	case E_DImode:
+	case E_TImode:
+	case E_OImode:
+	  if (evex_reg_p)
+	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+	  else
+	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
+	  break;
+	case E_XImode:
+	  opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    gcc_unreachable ();
+
+  switch (size)
+    {
+    case 64:
+      snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
+		opcode);
+      break;
+    case 32:
+      snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
+		opcode);
+      break;
+    case 16:
+      snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
+		opcode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_asm_insn (buf, operands);
+  return "";
+}
+
+/* Return the template of the TYPE_SSEMOV instruction to move
+   operands[1] into operands[0].  */
+
+const char *
+ix86_output_ssemov (rtx_insn *insn, rtx *operands)
+{
+  machine_mode mode = GET_MODE (operands[0]);
+  if (get_attr_type (insn) != TYPE_SSEMOV
+      || mode != GET_MODE (operands[1]))
+    gcc_unreachable ();
+
+  enum attr_mode insn_mode = get_attr_mode (insn);
+
+  switch (insn_mode)
+    {
+    case MODE_XI:
+    case MODE_V8DF:
+    case MODE_V16SF:
+      return ix86_get_ssemov (operands, 64, insn_mode, mode);
+
+    case MODE_OI:
+    case MODE_V4DF:
+    case MODE_V8SF:
+      return ix86_get_ssemov (operands, 32, insn_mode, mode);
+
+    case MODE_TI:
+    case MODE_V2DF:
+    case MODE_V4SF:
+      return ix86_get_ssemov (operands, 16, insn_mode, mode);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* Returns true if OP contains a symbol reference */
 
 bool
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6c57500ae8e..cea831b6086 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1902,11 +1902,7 @@ (define_insn "*movxi_internal_avx512f"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      if (misaligned_operand (operands[0], XImode)
-	  || misaligned_operand (operands[1], XImode))
-	return "vmovdqu32\t{%1, %0|%0, %1}";
-      else
-	return "vmovdqa32\t{%1, %0|%0, %1}";
+      return ix86_output_ssemov (insn, operands);
 
     default:
       gcc_unreachable ();
@@ -1929,21 +1925,7 @@ (define_insn "*movoi_internal_avx"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      if (misaligned_operand (operands[0], OImode)
-	  || misaligned_operand (operands[1], OImode))
-	{
-	  if (get_attr_mode (insn) == MODE_XI)
-	    return "vmovdqu32\t{%1, %0|%0, %1}";
-	  else
-	    return "vmovdqu\t{%1, %0|%0, %1}";
-	}
-      else
-	{
-	  if (get_attr_mode (insn) == MODE_XI)
-	    return "vmovdqa32\t{%1, %0|%0, %1}";
-	  else
-	    return "vmovdqa\t{%1, %0|%0, %1}";
-	}
+      return ix86_output_ssemov (insn, operands);
 
     default:
       gcc_unreachable ();
@@ -1952,15 +1934,7 @@ (define_insn "*movoi_internal_avx"
   [(set_attr "isa" "*,avx2,*,*")
    (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
    (set_attr "prefix" "vex")
-   (set (attr "mode")
-	(cond [(ior (match_operand 0 "ext_sse_reg_operand")
-		    (match_operand 1 "ext_sse_reg_operand"))
-		 (const_string "XI")
-	       (and (eq_attr "alternative" "1")
-		    (match_test "TARGET_AVX512VL"))
-		 (const_string "XI")
-	      ]
-	      (const_string "OI")))])
+   (set_attr "mode" "OI")])
 
 (define_insn "*movti_internal"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd")
@@ -1981,27 +1955,7 @@ (define_insn "*movti_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      /* TDmode values are passed as TImode on the stack.  Moving them
-	 to stack may result in unaligned memory access.  */
-      if (misaligned_operand (operands[0], TImode)
-	  || misaligned_operand (operands[1], TImode))
-	{
-	  if (get_attr_mode (insn) == MODE_V4SF)
-	    return "%vmovups\t{%1, %0|%0, %1}";
-	  else if (get_attr_mode (insn) == MODE_XI)
-	    return "vmovdqu32\t{%1, %0|%0, %1}";
-	  else
-	    return "%vmovdqu\t{%1, %0|%0, %1}";
-	}
-      else
-	{
-	  if (get_attr_mode (insn) == MODE_V4SF)
-	    return "%vmovaps\t{%1, %0|%0, %1}";
-	  else if (get_attr_mode (insn) == MODE_XI)
-	    return "vmovdqa32\t{%1, %0|%0, %1}";
-	  else
-	    return "%vmovdqa\t{%1, %0|%0, %1}";
-	}
+      return ix86_output_ssemov (insn, operands);
 
     default:
       gcc_unreachable ();
@@ -2028,12 +1982,6 @@ (define_insn "*movti_internal"
    (set (attr "mode")
 	(cond [(eq_attr "alternative" "0,1")
 		 (const_string "DI")
-	       (ior (match_operand 0 "ext_sse_reg_operand")
-		    (match_operand 1 "ext_sse_reg_operand"))
-		 (const_string "XI")
-	       (and (eq_attr "alternative" "3")
-		    (match_test "TARGET_AVX512VL"))
-		 (const_string "XI")
 	       (match_test "TARGET_AVX")
 		 (const_string "TI")
 	       (ior (not (match_test "TARGET_SSE2"))
@@ -3254,31 +3202,7 @@ (define_insn "*movtf_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      /* Handle misaligned load/store since we
-         don't have movmisaligntf pattern. */
-      if (misaligned_operand (operands[0], TFmode)
-	  || misaligned_operand (operands[1], TFmode))
-	{
-	  if (get_attr_mode (insn) == MODE_V4SF)
-	    return "%vmovups\t{%1, %0|%0, %1}";
-	  else if (TARGET_AVX512VL
-		   && (EXT_REX_SSE_REG_P (operands[0])
-		       || EXT_REX_SSE_REG_P (operands[1])))
-	    return "vmovdqu64\t{%1, %0|%0, %1}";
-	  else
-	    return "%vmovdqu\t{%1, %0|%0, %1}";
-	}
-      else
-	{
-	  if (get_attr_mode (insn) == MODE_V4SF)
-	    return "%vmovaps\t{%1, %0|%0, %1}";
-	  else if (TARGET_AVX512VL
-		   && (EXT_REX_SSE_REG_P (operands[0])
-		       || EXT_REX_SSE_REG_P (operands[1])))
-	    return "vmovdqa64\t{%1, %0|%0, %1}";
-	  else
-	    return "%vmovdqa\t{%1, %0|%0, %1}";
-	}
+      return ix86_output_ssemov (insn, operands);
 
     case TYPE_MULTI:
 	return "#";
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ee1f138d1af..8f5902292c6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1013,98 +1013,7 @@ (define_insn "mov<mode>_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
-	 in avx512f, so we need to use workarounds, to access sse registers
-	 16-31, which are evex-only. In avx512vl we don't need workarounds.  */
-      if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
-	  && (EXT_REX_SSE_REG_P (operands[0])
-	      || EXT_REX_SSE_REG_P (operands[1])))
-	{
-	  if (memory_operand (operands[0], <MODE>mode))
-	    {
-	      if (<MODE_SIZE> == 32)
-		return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
-	      else if (<MODE_SIZE> == 16)
-		return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
-	      else
-		gcc_unreachable ();
-	    }
-	  else if (memory_operand (operands[1], <MODE>mode))
-	    {
-	      if (<MODE_SIZE> == 32)
-		return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
-	      else if (<MODE_SIZE> == 16)
-		return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
-	      else
-		gcc_unreachable ();
-	    }
-	  else
-	    /* Reg -> reg move is always aligned.  Just use wider move.  */
-	    switch (get_attr_mode (insn))
-	      {
-	      case MODE_V8SF:
-	      case MODE_V4SF:
-		return "vmovaps\t{%g1, %g0|%g0, %g1}";
-	      case MODE_V4DF:
-	      case MODE_V2DF:
-		return "vmovapd\t{%g1, %g0|%g0, %g1}";
-	      case MODE_OI:
-	      case MODE_TI:
-		return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
-	      default:
-		gcc_unreachable ();
-	      }
-	}
-
-      switch (get_attr_mode (insn))
-	{
-	case MODE_V16SF:
-	case MODE_V8SF:
-	case MODE_V4SF:
-	  if (misaligned_operand (operands[0], <MODE>mode)
-	      || misaligned_operand (operands[1], <MODE>mode))
-	    return "%vmovups\t{%1, %0|%0, %1}";
-	  else
-	    return "%vmovaps\t{%1, %0|%0, %1}";
-
-	case MODE_V8DF:
-	case MODE_V4DF:
-	case MODE_V2DF:
-	  if (misaligned_operand (operands[0], <MODE>mode)
-	      || misaligned_operand (operands[1], <MODE>mode))
-	    return "%vmovupd\t{%1, %0|%0, %1}";
-	  else
-	    return "%vmovapd\t{%1, %0|%0, %1}";
-
-	case MODE_OI:
-	case MODE_TI:
-	  if (misaligned_operand (operands[0], <MODE>mode)
-	      || misaligned_operand (operands[1], <MODE>mode))
-	    return TARGET_AVX512VL
-		   && (<MODE>mode == V4SImode
-		       || <MODE>mode == V2DImode
-		       || <MODE>mode == V8SImode
-		       || <MODE>mode == V4DImode
-		       || TARGET_AVX512BW)
-		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
-		   : "%vmovdqu\t{%1, %0|%0, %1}";
-	  else
-	    return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
-				   : "%vmovdqa\t{%1, %0|%0, %1}";
-	case MODE_XI:
-	  if (misaligned_operand (operands[0], <MODE>mode)
-	      || misaligned_operand (operands[1], <MODE>mode))
-	    return (<MODE>mode == V16SImode
-		    || <MODE>mode == V8DImode
-		    || TARGET_AVX512BW)
-		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
-		   : "vmovdqu64\t{%1, %0|%0, %1}";
-	  else
-	    return "vmovdqa64\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     default:
       gcc_unreachable ();
@@ -1113,10 +1022,7 @@ (define_insn "mov<mode>_internal"
   [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
    (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
-	(cond [(and (eq_attr "alternative" "1")
-		    (match_test "TARGET_AVX512VL"))
-		 (const_string "<sseinsnmode>")
-	       (match_test "TARGET_AVX")
+	(cond [(match_test "TARGET_AVX")
 		 (const_string "<sseinsnmode>")
 	       (ior (not (match_test "TARGET_SSE2"))
 		    (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
index 14fe4b84544..db4d9d14875 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
@@ -4,14 +4,13 @@
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\nxy\]*\\((?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2a.c b/gcc/testsuite/gcc.target/i386/pr89229-2a.c
new file mode 100644
index 00000000000..0cf78039481
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-2a.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+typedef __int128 __m128t __attribute__ ((__vector_size__ (16),
+					 __may_alias__));
+
+__m128t
+foo1 (void)
+{
+  register __int128 xmm16 __asm ("xmm16") = (__int128) -1;
+  asm volatile ("" : "+v" (xmm16));
+  return (__m128t) xmm16;
+}
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2b.c b/gcc/testsuite/gcc.target/i386/pr89229-2b.c
new file mode 100644
index 00000000000..8d5d6c41d30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-2b.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+typedef __int128 __m128t __attribute__ ((__vector_size__ (16),
+					 __may_alias__));
+
+__m128t
+foo1 (void)
+{
+  register __int128 xmm16 __asm ("xmm16") = (__int128) -1; /* { dg-error "register specified for 'xmm16'" } */
+  asm volatile ("" : "+v" (xmm16));
+  return (__m128t) xmm16;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2c.c b/gcc/testsuite/gcc.target/i386/pr89229-2c.c
new file mode 100644
index 00000000000..218da46dcd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-2c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-2a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3a.c b/gcc/testsuite/gcc.target/i386/pr89229-3a.c
new file mode 100644
index 00000000000..fcb85c366b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-3a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern __float128 d;
+
+void
+foo1 (__float128 x)
+{
+  register __float128 xmm16 __asm ("xmm16") = x;
+  asm volatile ("" : "+v" (xmm16));
+  register __float128 xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  d = xmm17;
+}
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3b.c b/gcc/testsuite/gcc.target/i386/pr89229-3b.c
new file mode 100644
index 00000000000..37eb83c783b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-3b.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+extern __float128 d;
+
+void
+foo1 (__float128 x)
+{
+  register __float128 xmm16 __asm ("xmm16") = x; /* { dg-error "register specified for 'xmm16'" } */
+  asm volatile ("" : "+v" (xmm16));
+  d = xmm16;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3c.c b/gcc/testsuite/gcc.target/i386/pr89229-3c.c
new file mode 100644
index 00000000000..529a520133c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-3c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-5a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89346.c b/gcc/testsuite/gcc.target/i386/pr89346.c
new file mode 100644
index 00000000000..cdc9accf521
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89346.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m256i y;
+
+void
+foo (void)
+{
+   _mm256_store_epi64 (p, y);
+}
+
+/* { dg-final { scan-assembler-not "vmovdqa64" } } */
-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 5/6] i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV
  2020-02-29 14:16 V2 [PATCH 0/6] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move H.J. Lu
                   ` (2 preceding siblings ...)
  2020-02-29 14:16 ` [PATCH 2/6] i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV H.J. Lu
@ 2020-02-29 14:16 ` H.J. Lu
  2020-03-12  3:46   ` Jeff Law
  2020-02-29 14:16 ` [PATCH 6/6] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV H.J. Lu
  2020-02-29 15:30 ` [PATCH 3/6] i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV H.J. Lu
  5 siblings, 1 reply; 16+ messages in thread
From: H.J. Lu @ 2020-02-29 14:16 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek, Jeffrey Law, Jan Hubicka, Uros Bizjak

There is no need to set mode attribute to V16SFmode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

gcc/

	PR target/89229
	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_SF.
	* config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov
	for TYPE_SSEMOV.  Remove TARGET_PREFER_AVX256, TARGET_AVX512VL
	and ext_sse_reg_operand check.

gcc/testsuite/

	PR target/89229
	* gcc.target/i386/pr89229-7a.c: New test.
	* gcc.target/i386/pr89229-7b.c: Likewise.
	* gcc.target/i386/pr89229-7c.c: Likewise.
---
 gcc/config/i386/i386.c                     |  6 +++++
 gcc/config/i386/i386.md                    | 26 ++--------------------
 gcc/testsuite/gcc.target/i386/pr89229-7a.c | 16 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr89229-7b.c |  6 +++++
 gcc/testsuite/gcc.target/i386/pr89229-7c.c |  6 +++++
 5 files changed, 36 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7c.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a6fe9894ab8..1d3b784532b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5136,6 +5136,12 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
       else
 	return "%vmovsd\t{%1, %0|%0, %1}";
 
+    case MODE_SF:
+      if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
+	return "vmovss\t{%d1, %0|%0, %d1}";
+      else
+	return "%vmovss\t{%1, %0|%0, %1}";
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 060a34c4bd4..b837c345f4e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3469,24 +3469,7 @@ (define_insn "*movsf_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_SF:
-	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
-	    return "vmovss\t{%d1, %0|%0, %d1}";
-	  return "%vmovss\t{%1, %0|%0, %1}";
-
-	case MODE_V16SF:
-	  return "vmovaps\t{%g1, %g0|%g0, %g1}";
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-
-	case MODE_SI:
-	  return "%vmovd\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     case TYPE_MMXMOV:
       switch (get_attr_mode (insn))
@@ -3558,12 +3541,7 @@ (define_insn "*movsf_internal"
 		  better to maintain the whole registers in single format
 		  to avoid problems on using packed logical operations.  */
 	       (eq_attr "alternative" "6")
-		 (cond [(and (ior (not (match_test "TARGET_PREFER_AVX256"))
-				  (not (match_test "TARGET_AVX512VL")))
-			     (ior (match_operand 0 "ext_sse_reg_operand")
-				  (match_operand 1 "ext_sse_reg_operand")))
-			  (const_string "V16SF")
-			(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		 (cond [(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
 			     (match_test "TARGET_SSE_SPLIT_REGS"))
 			  (const_string "V4SF")
 		       ]
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7a.c b/gcc/testsuite/gcc.target/i386/pr89229-7a.c
new file mode 100644
index 00000000000..856115b2f5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-7a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern float d;
+
+void
+foo1 (float x)
+{
+  register float xmm16 __asm ("xmm16") = x;
+  asm volatile ("" : "+v" (xmm16));
+  register float xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  d = xmm17;
+}
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7b.c b/gcc/testsuite/gcc.target/i386/pr89229-7b.c
new file mode 100644
index 00000000000..93d1e43770c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-7b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-7a.c"
+
+/* { dg-final { scan-assembler-times "vmovaps\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7c.c b/gcc/testsuite/gcc.target/i386/pr89229-7c.c
new file mode 100644
index 00000000000..e37ff2bf5bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-7c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-7a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 3/6] i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV
  2020-02-29 14:16 V2 [PATCH 0/6] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move H.J. Lu
                   ` (4 preceding siblings ...)
  2020-02-29 14:16 ` [PATCH 6/6] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV H.J. Lu
@ 2020-02-29 15:30 ` H.J. Lu
  2020-03-12  3:39   ` Jeff Law
  5 siblings, 1 reply; 16+ messages in thread
From: H.J. Lu @ 2020-02-29 15:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek, Jeffrey Law, Jan Hubicka, Uros Bizjak

There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

gcc/

	PR target/89229
	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_SI.
	* config/i386/i386.md (*movsi_internal): Call ix86_output_ssemov
	for TYPE_SSEMOV.  Remove ext_sse_reg_operand and TARGET_AVX512VL
	check.

gcc/testsuite/

	PR target/89229
	* gcc.target/i386/pr89229-5a.c: New test.
	* gcc.target/i386/pr89229-5b.c: Likewise.
	* gcc.target/i386/pr89229-5c.c: Likewise.
---
 gcc/config/i386/i386.c                     |  3 +++
 gcc/config/i386/i386.md                    | 25 ++--------------------
 gcc/testsuite/gcc.target/i386/pr89229-5a.c | 17 +++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89229-5b.c |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr89229-5c.c |  7 ++++++
 5 files changed, 35 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5c.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index baf70a64193..c28c162282a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5127,6 +5127,9 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
       else
 	return "%vmovq\t{%1, %0|%0, %1}";
 
+    case MODE_SI:
+      return "%vmovd\t{%1, %0|%0, %1}";
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d8462b3de37..e9537fadfe8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2261,25 +2261,7 @@ (define_insn "*movsi_internal"
       gcc_unreachable ();
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_SI:
-          return "%vmovd\t{%1, %0|%0, %1}";
-	case MODE_TI:
-	  return "%vmovdqa\t{%1, %0|%0, %1}";
-	case MODE_XI:
-	  return "vmovdqa32\t{%g1, %g0|%g0, %g1}";
-
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-
-	case MODE_SF:
-	  gcc_assert (!TARGET_AVX);
-          return "movss\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     case TYPE_MMX:
       return "pxor\t%0, %0";
@@ -2345,10 +2327,7 @@ (define_insn "*movsi_internal"
      (cond [(eq_attr "alternative" "2,3")
 	      (const_string "DI")
 	    (eq_attr "alternative" "8,9")
-	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
-			  (match_operand 1 "ext_sse_reg_operand"))
-		       (const_string "XI")
-		     (match_test "TARGET_AVX")
+	      (cond [(match_test "TARGET_AVX")
 		       (const_string "TI")
 		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5a.c b/gcc/testsuite/gcc.target/i386/pr89229-5a.c
new file mode 100644
index 00000000000..fd56f447016
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-5a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern int i;
+
+int
+foo1 (void)
+{
+  register int xmm16 __asm ("xmm16") = i;
+  asm volatile ("" : "+v" (xmm16));
+  register int xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  return xmm17;
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5b.c b/gcc/testsuite/gcc.target/i386/pr89229-5b.c
new file mode 100644
index 00000000000..261f2e12e8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-5b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-5a.c"
+
+/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5c.c b/gcc/testsuite/gcc.target/i386/pr89229-5c.c
new file mode 100644
index 00000000000..16fad809385
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-5c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-5a.c"
+
+/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/6] i386: Properly encode vector registers in vector move
  2020-02-29 14:16 ` [PATCH 1/6] i386: Properly encode vector registers in " H.J. Lu
@ 2020-03-05 23:47   ` Jeff Law
  2020-03-08 12:04     ` [COMMITTED, PATCH] gcc.target/i386/pr89229-3c.c: Include "pr89229-3a.c" H.J. Lu
  2020-03-10 12:35     ` [PATCH 1/6] i386: Properly encode vector registers in vector move H.J. Lu
  0 siblings, 2 replies; 16+ messages in thread
From: Jeff Law @ 2020-03-05 23:47 UTC (permalink / raw)
  To: H.J. Lu, gcc-patches; +Cc: Jakub Jelinek, Jan Hubicka, Uros Bizjak

On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> On x86, when AVX and AVX512 are enabled, vector move instructions can
> be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512):
> 
>    0:	c5 f9 6f d1          	vmovdqa %xmm1,%xmm2
>    4:	62 f1 fd 08 6f d1    	vmovdqa64 %xmm1,%xmm2
> 
> We prefer VEX encoding over EVEX since VEX is shorter.  Also AVX512F
> only supports 512-bit vector moves.  AVX512F + AVX512VL supports 128-bit
> and 256-bit vector moves.  xmm16-xmm31 and ymm16-ymm31 are disallowed in
> 128-bit and 256-bit modes when AVX512VL is disabled.  Mode attributes on
> x86 vector move patterns indicate target preferences of vector move
> encoding.  For scalar register to register move, we can use 512-bit
> vector move instructions to move 32-bit/64-bit scalar if AVX512VL isn't
> available.  With AVX512F and AVX512VL, we should use VEX encoding for
> 128-bit/256-bit vector moves if upper 16 vector registers aren't used.
> This patch adds a function, ix86_output_ssemov, to generate vector moves:
> 
> 1. If zmm registers are used, use EVEX encoding.
> 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding
> will be generated.
> 3. If xmm16-xmm31/ymm16-ymm31 registers are used:
>    a. With AVX512VL, AVX512VL vector moves will be generated.
>    b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register
>       move will be done with zmm register move.
> 
> There is no need to set mode attribute to XImode explicitly since
> ix86_output_ssemov can properly encode xmm16-xmm31/ymm16-ymm31 registers
> with and without AVX512VL.
> 
> Tested on AVX2 and AVX512 with and without --with-arch=native.
> 
> gcc/
> 
> 	PR target/89229
> 	PR target/89346
> 	* config/i386/i386-protos.h (ix86_output_ssemov): New prototype.
> 	* config/i386/i386.c (ix86_get_ssemov): New function.
> 	(ix86_output_ssemov): Likewise.
> 	* config/i386/sse.md (VMOVE:mov<mode>_internal): Call
> 	ix86_output_ssemov for TYPE_SSEMOV.  Remove TARGET_AVX512VL
> 	check.
> 	(*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV.
> 	(*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV.
> 	Remove ext_sse_reg_operand and TARGET_AVX512VL check.
> 	(*movti_internal): Likewise.
> 	(*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV.
> 
> gcc/testsuite/
> 
> 	PR target/89229
> 	PR target/89346
> 	* gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated.
> 	* gcc.target/i386/pr89346.c: New test.
> 
> gcc/testsuite/
> 
> 	PR target/89229
> 	* gcc.target/i386/pr89229-2a.c: New test.
> 	* gcc.target/i386/pr89229-2b.c: Likewise.
> 	* gcc.target/i386/pr89229-2c.c: Likewise.
> 	* gcc.target/i386/pr89229-3a.c: Likewise.
> 	* gcc.target/i386/pr89229-3b.c: Likewise.
> 	* gcc.target/i386/pr89229-3c.c: Likewise.
OK.  Let's get this one installed, let the various testers out there chew on it
for a day, then we'll iterate through the rest.

Thanks again for your patience.

jeff
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [COMMITTED, PATCH] gcc.target/i386/pr89229-3c.c: Include "pr89229-3a.c"
  2020-03-05 23:47   ` Jeff Law
@ 2020-03-08 12:04     ` H.J. Lu
  2020-03-10 12:35     ` [PATCH 1/6] i386: Properly encode vector registers in vector move H.J. Lu
  1 sibling, 0 replies; 16+ messages in thread
From: H.J. Lu @ 2020-03-08 12:04 UTC (permalink / raw)
  To: Jeffrey Law; +Cc: GCC Patches, Jakub Jelinek, Jan Hubicka, Uros Bizjak

On Thu, Mar 5, 2020 at 3:47 PM Jeff Law <law@redhat.com> wrote:
>
> On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> > On x86, when AVX and AVX512 are enabled, vector move instructions can
> > be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512):
> >
> >    0: c5 f9 6f d1             vmovdqa %xmm1,%xmm2
> >    4: 62 f1 fd 08 6f d1       vmovdqa64 %xmm1,%xmm2
> >
> > We prefer VEX encoding over EVEX since VEX is shorter.  Also AVX512F
> > only supports 512-bit vector moves.  AVX512F + AVX512VL supports 128-bit
> > and 256-bit vector moves.  xmm16-xmm31 and ymm16-ymm31 are disallowed in
> > 128-bit and 256-bit modes when AVX512VL is disabled.  Mode attributes on
> > x86 vector move patterns indicate target preferences of vector move
> > encoding.  For scalar register to register move, we can use 512-bit
> > vector move instructions to move 32-bit/64-bit scalar if AVX512VL isn't
> > available.  With AVX512F and AVX512VL, we should use VEX encoding for
> > 128-bit/256-bit vector moves if upper 16 vector registers aren't used.
> > This patch adds a function, ix86_output_ssemov, to generate vector moves:
> >
> > 1. If zmm registers are used, use EVEX encoding.
> > 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding
> > will be generated.
> > 3. If xmm16-xmm31/ymm16-ymm31 registers are used:
> >    a. With AVX512VL, AVX512VL vector moves will be generated.
> >    b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register
> >       move will be done with zmm register move.
> >
> > There is no need to set mode attribute to XImode explicitly since
> > ix86_output_ssemov can properly encode xmm16-xmm31/ymm16-ymm31 registers
> > with and without AVX512VL.
> >
> > Tested on AVX2 and AVX512 with and without --with-arch=native.
> >
> > gcc/
> >
> >       PR target/89229
> >       PR target/89346
> >       * config/i386/i386-protos.h (ix86_output_ssemov): New prototype.
> >       * config/i386/i386.c (ix86_get_ssemov): New function.
> >       (ix86_output_ssemov): Likewise.
> >       * config/i386/sse.md (VMOVE:mov<mode>_internal): Call
> >       ix86_output_ssemov for TYPE_SSEMOV.  Remove TARGET_AVX512VL
> >       check.
> >       (*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV.
> >       (*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV.
> >       Remove ext_sse_reg_operand and TARGET_AVX512VL check.
> >       (*movti_internal): Likewise.
> >       (*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV.
> >
> > gcc/testsuite/
> >
> >       PR target/89229
> >       PR target/89346
> >       * gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated.
> >       * gcc.target/i386/pr89346.c: New test.
> >
> > gcc/testsuite/
> >
> >       PR target/89229
> >       * gcc.target/i386/pr89229-2a.c: New test.
> >       * gcc.target/i386/pr89229-2b.c: Likewise.
> >       * gcc.target/i386/pr89229-2c.c: Likewise.
> >       * gcc.target/i386/pr89229-3a.c: Likewise.
> >       * gcc.target/i386/pr89229-3b.c: Likewise.
> >       * gcc.target/i386/pr89229-3c.c: Likewise.
> OK.  Let's get this one installed, let the various testers out there chew on it
> for a day, then we'll iterate through the rest.
>
> Thanks again for your patience.
>

I checked in this patch to fix

FAIL: gcc.target/i386/pr89229-3c.c (test for excess errors)

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/6] i386: Properly encode vector registers in vector move
  2020-03-05 23:47   ` Jeff Law
  2020-03-08 12:04     ` [COMMITTED, PATCH] gcc.target/i386/pr89229-3c.c: Include "pr89229-3a.c" H.J. Lu
@ 2020-03-10 12:35     ` H.J. Lu
  1 sibling, 0 replies; 16+ messages in thread
From: H.J. Lu @ 2020-03-10 12:35 UTC (permalink / raw)
  To: Jeffrey Law; +Cc: GCC Patches, Jakub Jelinek, Jan Hubicka, Uros Bizjak

On Thu, Mar 5, 2020 at 3:47 PM Jeff Law <law@redhat.com> wrote:
>
> On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> > On x86, when AVX and AVX512 are enabled, vector move instructions can
> > be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512):
> >
> >    0: c5 f9 6f d1             vmovdqa %xmm1,%xmm2
> >    4: 62 f1 fd 08 6f d1       vmovdqa64 %xmm1,%xmm2
> >
> > We prefer VEX encoding over EVEX since VEX is shorter.  Also AVX512F
> > only supports 512-bit vector moves.  AVX512F + AVX512VL supports 128-bit
> > and 256-bit vector moves.  xmm16-xmm31 and ymm16-ymm31 are disallowed in
> > 128-bit and 256-bit modes when AVX512VL is disabled.  Mode attributes on
> > x86 vector move patterns indicate target preferences of vector move
> > encoding.  For scalar register to register move, we can use 512-bit
> > vector move instructions to move 32-bit/64-bit scalar if AVX512VL isn't
> > available.  With AVX512F and AVX512VL, we should use VEX encoding for
> > 128-bit/256-bit vector moves if upper 16 vector registers aren't used.
> > This patch adds a function, ix86_output_ssemov, to generate vector moves:
> >
> > 1. If zmm registers are used, use EVEX encoding.
> > 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding
> > will be generated.
> > 3. If xmm16-xmm31/ymm16-ymm31 registers are used:
> >    a. With AVX512VL, AVX512VL vector moves will be generated.
> >    b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register
> >       move will be done with zmm register move.
> >
> > There is no need to set mode attribute to XImode explicitly since
> > ix86_output_ssemov can properly encode xmm16-xmm31/ymm16-ymm31 registers
> > with and without AVX512VL.
> >
> > Tested on AVX2 and AVX512 with and without --with-arch=native.
> >
> > gcc/
> >
> >       PR target/89229
> >       PR target/89346
> >       * config/i386/i386-protos.h (ix86_output_ssemov): New prototype.
> >       * config/i386/i386.c (ix86_get_ssemov): New function.
> >       (ix86_output_ssemov): Likewise.
> >       * config/i386/sse.md (VMOVE:mov<mode>_internal): Call
> >       ix86_output_ssemov for TYPE_SSEMOV.  Remove TARGET_AVX512VL
> >       check.
> >       (*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV.
> >       (*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV.
> >       Remove ext_sse_reg_operand and TARGET_AVX512VL check.
> >       (*movti_internal): Likewise.
> >       (*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV.
> >
> > gcc/testsuite/
> >
> >       PR target/89229
> >       PR target/89346
> >       * gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated.
> >       * gcc.target/i386/pr89346.c: New test.
> >
> > gcc/testsuite/
> >
> >       PR target/89229
> >       * gcc.target/i386/pr89229-2a.c: New test.
> >       * gcc.target/i386/pr89229-2b.c: Likewise.
> >       * gcc.target/i386/pr89229-2c.c: Likewise.
> >       * gcc.target/i386/pr89229-3a.c: Likewise.
> >       * gcc.target/i386/pr89229-3b.c: Likewise.
> >       * gcc.target/i386/pr89229-3c.c: Likewise.
> OK.  Let's get this one installed, let the various testers out there chew on it
> for a day, then we'll iterate through the rest.
>
> Thanks again for your patience.

Hi, Jeff,

My first patch has been installed for 5 days without problems.  Can you
review the rest?

Thanks.


-- 
H.J.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/6] i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV
  2020-02-29 14:16 ` [PATCH 2/6] i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV H.J. Lu
@ 2020-03-12  3:32   ` Jeff Law
  0 siblings, 0 replies; 16+ messages in thread
From: Jeff Law @ 2020-03-12  3:32 UTC (permalink / raw)
  To: H.J. Lu, gcc-patches; +Cc: Jakub Jelinek, Jan Hubicka, Uros Bizjak

On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> There is no need to set mode attribute to XImode since ix86_output_ssemov
> can properly encode xmm16-xmm31 registers with and without AVX512VL.
> 
> gcc/
> 
> 	PR target/89229
> 	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_DI.
> 	* config/i386/i386.md (*movdi_internal): Call ix86_output_ssemov
> 	for TYPE_SSEMOV.  Remove ext_sse_reg_operand and TARGET_AVX512VL
> 	check.
> 
> gcc/testsuite/
> 
> 	PR target/89229
> 	* gcc.target/i386/pr89229-4a.c: New test.
> 	* gcc.target/i386/pr89229-4b.c: Likewise.
> 	* gcc.target/i386/pr89229-4c.c: Likewise.
So for alternatives 14, 15, 16 and !TARGET_SSE2 can't the insn_mode be V2SF? 
Isn't that going to trigger the gcc_unreachable in ix86_output_ssemov?



Jeff


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/6] i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV
  2020-02-29 15:30 ` [PATCH 3/6] i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV H.J. Lu
@ 2020-03-12  3:39   ` Jeff Law
  0 siblings, 0 replies; 16+ messages in thread
From: Jeff Law @ 2020-03-12  3:39 UTC (permalink / raw)
  To: H.J. Lu, gcc-patches; +Cc: Jakub Jelinek, Jan Hubicka, Uros Bizjak

On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> There is no need to set mode attribute to XImode since ix86_output_ssemov
> can properly encode xmm16-xmm31 registers with and without AVX512VL.
> 
> gcc/
> 
> 	PR target/89229
> 	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_SI.
> 	* config/i386/i386.md (*movsi_internal): Call ix86_output_ssemov
> 	for TYPE_SSEMOV.  Remove ext_sse_reg_operand and TARGET_AVX512VL
> 	check.
> 
> gcc/testsuite/
> 
> 	PR target/89229
> 	* gcc.target/i386/pr89229-5a.c: New test.
> 	* gcc.target/i386/pr89229-5b.c: Likewise.
> 	* gcc.target/i386/pr89229-5c.c: Likewise.
Similar to #2, can't we get insn_mode to be SFmode for alternatives 10,11 and
!TARGET_SSE2?  Won't that cause us to hit the gcc_unreachable in
ix86_output_ssemov?

jeff
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/6] i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV
  2020-02-29 14:16 ` [PATCH 4/6] i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV H.J. Lu
@ 2020-03-12  3:41   ` Jeff Law
  0 siblings, 0 replies; 16+ messages in thread
From: Jeff Law @ 2020-03-12  3:41 UTC (permalink / raw)
  To: H.J. Lu, gcc-patches; +Cc: Jakub Jelinek, Jan Hubicka, Uros Bizjak

On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> There is no need to set mode attribute to XImode nor V8DFmode since
> ix86_output_ssemov can properly encode xmm16-xmm31 registers with and
> without AVX512VL.
> 
> gcc/
> 
> 	PR target/89229
> 	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_DF.
> 	* config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov
> 	for TYPE_SSEMOV.  Remove TARGET_AVX512F, TARGET_PREFER_AVX256,
> 	TARGET_AVX512VL and ext_sse_reg_operand check.
And I worry about V1DF for alternatives 14,18 and SSE_SPLIT_REGS as well as
alternative 15,19 and !TARGET_SSE2 which has insn_mode of V2SF.

jeff
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 5/6] i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV
  2020-02-29 14:16 ` [PATCH 5/6] i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV H.J. Lu
@ 2020-03-12  3:46   ` Jeff Law
  0 siblings, 0 replies; 16+ messages in thread
From: Jeff Law @ 2020-03-12  3:46 UTC (permalink / raw)
  To: H.J. Lu, gcc-patches; +Cc: Jakub Jelinek, Jan Hubicka, Uros Bizjak

On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> There is no need to set mode attribute to V16SFmode since ix86_output_ssemov
> can properly encode xmm16-xmm31 registers with and without AVX512VL.
> 
> gcc/
> 
> 	PR target/89229
> 	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_SF.
> 	* config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov
> 	for TYPE_SSEMOV.  Remove TARGET_PREFER_AVX256, TARGET_AVX512VL
> 	and ext_sse_reg_operand check.
> 
> gcc/testsuite/
> 
> 	PR target/89229
> 	* gcc.target/i386/pr89229-7a.c: New test.
> 	* gcc.target/i386/pr89229-7b.c: Likewise.
> 	* gcc.target/i386/pr89229-7c.c: Likewise.
I believe this as a dependency on patch #3.  It's OK once patch #3 is approved. 
Alternately, you could break out the MODE_SI hunk in ix86_output_ssemov from
patch #3, add it to this patch and that would be approved for immediate
integration.

Jeff


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 6/6] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV
  2020-02-29 14:16 ` [PATCH 6/6] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV H.J. Lu
@ 2020-03-12  3:53   ` Jeff Law
  2020-03-12 10:52     ` H.J. Lu
  0 siblings, 1 reply; 16+ messages in thread
From: Jeff Law @ 2020-03-12  3:53 UTC (permalink / raw)
  To: H.J. Lu, gcc-patches; +Cc: Jakub Jelinek, Jan Hubicka, Uros Bizjak

On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> There is no need to set mode attribute to XImode since ix86_output_ssemov
> can properly encode xmm16-xmm31 registers with and without AVX512VL.
> 
> Remove ext_sse_reg_operand since it is no longer needed.
> 
> 	PR target/89229
> 	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_V1DF and
> 	MODE_V2SF.
> 	* config/i386/mmx.md (MMXMODE:*mov<mode>_internal): Call
> 	ix86_output_ssemov for TYPE_SSEMOV.  Remove ext_sse_reg_operand
> 	check.
> 	* config/i386/predicates.md (ext_sse_reg_operand): Removed.
This is OK.   I think once this is in, patch #2 becomes OK because this patch
adds V2SF handling in ix86_output_ssemov.

Similarly I think patch #4 is OK once this one goes in since it adds V1DF as
well.

So perhaps an integration plan would be to immediately install #6, followed 24hrs
later by patch #4, then 24hrs after patch #2.

Then we can work on patch #5 and patch #3 where I think we go with patch #5 plus
the MODE_SI hunk from patch #3.  THen 24hrs after that the remaining bits of
patch #3.

I think that covers the whole series.

jeff


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 6/6] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV
  2020-03-12  3:53   ` Jeff Law
@ 2020-03-12 10:52     ` H.J. Lu
  0 siblings, 0 replies; 16+ messages in thread
From: H.J. Lu @ 2020-03-12 10:52 UTC (permalink / raw)
  To: Jeffrey Law; +Cc: GCC Patches, Jakub Jelinek, Jan Hubicka, Uros Bizjak

[-- Attachment #1: Type: text/plain, Size: 1522 bytes --]

On Wed, Mar 11, 2020 at 8:53 PM Jeff Law <law@redhat.com> wrote:
>
> On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> > There is no need to set mode attribute to XImode since ix86_output_ssemov
> > can properly encode xmm16-xmm31 registers with and without AVX512VL.
> >
> > Remove ext_sse_reg_operand since it is no longer needed.
> >
> >       PR target/89229
> >       * config/i386/i386.c (ix86_output_ssemov): Handle MODE_V1DF and
> >       MODE_V2SF.
> >       * config/i386/mmx.md (MMXMODE:*mov<mode>_internal): Call
> >       ix86_output_ssemov for TYPE_SSEMOV.  Remove ext_sse_reg_operand
> >       check.
> >       * config/i386/predicates.md (ext_sse_reg_operand): Removed.
> This is OK.   I think once this is in, patch #2 becomes OK because this patch
> adds V2SF handling in ix86_output_ssemov.

I need to take out the ext_sse_reg_operand removal since it is still being
used.  I added MODE_DI to to ix86_output_ssemov.

> Similarly I think patch #4 is OK once this one goes in since it adds V1DF as
> well.
>
> So perhaps an integration plan would be to immediately install #6, followed 24hrs
> later by patch #4, then 24hrs after patch #2.
>
> Then we can work on patch #5 and patch #3 where I think we go with patch #5 plus
> the MODE_SI hunk from patch #3.  THen 24hrs after that the remaining bits of
> patch #3.

I am enclosing the updated 5 remaining patches.  I will check in the
first one and
check in the rest one patch every 24hrs.

> I think that covers the whole series.
>

Thanks.

-- 
H.J.

[-- Attachment #2: 0001-i386-Use-ix86_output_ssemov-for-MMX-TYPE_SSEMOV.patch --]
[-- Type: text/x-patch, Size: 3294 bytes --]

From 555880dad82a9b511945250c0436ee05c4962f65 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 14 Feb 2020 11:07:34 -0800
Subject: [PATCH 1/5] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV

There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

	PR target/89229
	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_DI,
	MODE_V1DF and MODE_V2SF.
	* config/i386/mmx.md (MMXMODE:*mov<mode>_internal): Call
	ix86_output_ssemov for TYPE_SSEMOV.  Remove ext_sse_reg_operand
	check.
---
 gcc/config/i386/i386.c | 19 +++++++++++++++++++
 gcc/config/i386/mmx.md | 29 ++---------------------------
 2 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7bbfbb4c5a7..6d83855692f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5118,6 +5118,25 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
     case MODE_V4SF:
       return ix86_get_ssemov (operands, 16, insn_mode, mode);
 
+    case MODE_DI:
+      /* Handle broken assemblers that require movd instead of movq. */
+      if (!HAVE_AS_IX86_INTERUNIT_MOVQ
+	  && (GENERAL_REG_P (operands[0])
+	      || GENERAL_REG_P (operands[1])))
+	return "%vmovd\t{%1, %0|%0, %1}";
+      else
+	return "%vmovq\t{%1, %0|%0, %1}";
+
+    case MODE_V1DF:
+      gcc_assert (!TARGET_AVX);
+      return "movlpd\t{%1, %0|%0, %1}";
+
+    case MODE_V2SF:
+      if (TARGET_AVX && REG_P (operands[0]))
+	return "vmovlps\t{%1, %d0|%d0, %1}";
+      else
+	return "%vmovlps\t{%1, %0|%0, %1}";
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e1c8b0af4c7..c3f195bb34a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -118,29 +118,7 @@ (define_insn "*mov<mode>_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_DI:
-	  /* Handle broken assemblers that require movd instead of movq.  */
-	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
-	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
-	    return "%vmovd\t{%1, %0|%0, %1}";
-	  return "%vmovq\t{%1, %0|%0, %1}";
-	case MODE_TI:
-	  return "%vmovdqa\t{%1, %0|%0, %1}";
-	case MODE_XI:
-	  return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
-
-	case MODE_V2SF:
-	  if (TARGET_AVX && REG_P (operands[0]))
-	    return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
-	  return "%vmovlps\t{%1, %0|%0, %1}";
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     default:
       gcc_unreachable ();
@@ -189,10 +167,7 @@ (define_insn "*mov<mode>_internal"
      (cond [(eq_attr "alternative" "2")
 	      (const_string "SI")
 	    (eq_attr "alternative" "11,12")
-	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
-			  (match_operand 1 "ext_sse_reg_operand"))
-			(const_string "XI")
-		     (match_test "<MODE>mode == V2SFmode")
+	      (cond [(match_test "<MODE>mode == V2SFmode")
 		       (const_string "V4SF")
 		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "optimize_function_for_size_p (cfun)"))
-- 
2.24.1


[-- Attachment #3: 0002-i386-Use-ix86_output_ssemov-for-DFmode-TYPE_SSEMOV.patch --]
[-- Type: text/x-patch, Size: 5796 bytes --]

From d02ae1b84bb6dcc30230808a57e12d49d6f4a853 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 14 Feb 2020 10:32:06 -0800
Subject: [PATCH 2/5] i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV

There is no need to set mode attribute to XImode nor V8DFmode since
ix86_output_ssemov can properly encode xmm16-xmm31 registers with and
without AVX512VL.

gcc/

	PR target/89229
	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_DF.
	* config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov
	for TYPE_SSEMOV.  Remove TARGET_AVX512F, TARGET_PREFER_AVX256,
	TARGET_AVX512VL and ext_sse_reg_operand check.

gcc/testsuite/

	PR target/89229
	* gcc.target/i386/pr89229-4a.c: New test.
	* gcc.target/i386/pr89229-4b.c: Likewise.
	* gcc.target/i386/pr89229-4c.c: Likewise.
---
 gcc/config/i386/i386.c                     |  6 +++
 gcc/config/i386/i386.md                    | 44 ++--------------------
 gcc/testsuite/gcc.target/i386/pr89229-4a.c | 16 ++++++++
 gcc/testsuite/gcc.target/i386/pr89229-4b.c |  7 ++++
 gcc/testsuite/gcc.target/i386/pr89229-4c.c |  6 +++
 5 files changed, 38 insertions(+), 41 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4c.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6d83855692f..924f9558b24 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5127,6 +5127,12 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
       else
 	return "%vmovq\t{%1, %0|%0, %1}";
 
+    case MODE_DF:
+      if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
+	return "vmovsd\t{%d1, %0|%0, %d1}";
+      else
+	return "%vmovsd\t{%1, %0|%0, %1}";
+
     case MODE_V1DF:
       gcc_assert (!TARGET_AVX);
       return "movlpd\t{%1, %0|%0, %1}";
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8b5ae34ee11..0f57f939cc3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3355,37 +3355,7 @@ (define_insn "*movdf_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_DF:
-	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
-	    return "vmovsd\t{%d1, %0|%0, %d1}";
-	  return "%vmovsd\t{%1, %0|%0, %1}";
-
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-	case MODE_V8DF:
-	  return "vmovapd\t{%g1, %g0|%g0, %g1}";
-	case MODE_V2DF:
-	  return "%vmovapd\t{%1, %0|%0, %1}";
-
-	case MODE_V2SF:
-	  gcc_assert (!TARGET_AVX);
-	  return "movlps\t{%1, %0|%0, %1}";
-	case MODE_V1DF:
-	  gcc_assert (!TARGET_AVX);
-	  return "movlpd\t{%1, %0|%0, %1}";
-
-	case MODE_DI:
-	  /* Handle broken assemblers that require movd instead of movq.  */
-	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
-	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
-	    return "%vmovd\t{%1, %0|%0, %1}";
-	  return "%vmovq\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     default:
       gcc_unreachable ();
@@ -3439,10 +3409,7 @@ (define_insn "*movdf_internal"
 
 	       /* xorps is one byte shorter for non-AVX targets.  */
 	       (eq_attr "alternative" "12,16")
-		 (cond [(and (match_test "TARGET_AVX512F")
-			     (not (match_test "TARGET_PREFER_AVX256")))
-			  (const_string "XI")
-			(match_test "TARGET_AVX")
+		 (cond [(match_test "TARGET_AVX")
 			  (const_string "V2DF")
 			(ior (not (match_test "TARGET_SSE2"))
 			     (match_test "optimize_function_for_size_p (cfun)"))
@@ -3458,12 +3425,7 @@ (define_insn "*movdf_internal"
 
 	       /* movaps is one byte shorter for non-AVX targets.  */
 	       (eq_attr "alternative" "13,17")
-		 (cond [(and (ior (not (match_test "TARGET_PREFER_AVX256"))
-				  (not (match_test "TARGET_AVX512VL")))
-			     (ior (match_operand 0 "ext_sse_reg_operand")
-				  (match_operand 1 "ext_sse_reg_operand")))
-			  (const_string "V8DF")
-			(match_test "TARGET_AVX")
+		 (cond [(match_test "TARGET_AVX")
 			  (const_string "DF")
 			(ior (not (match_test "TARGET_SSE2"))
 			     (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4a.c b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
new file mode 100644
index 00000000000..5bc10d25619
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern double d;
+
+void
+foo1 (double x)
+{
+  register double xmm16 __asm ("xmm16") = x;
+  asm volatile ("" : "+v" (xmm16));
+  register double xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  d = xmm17;
+}
+
+/* { dg-final { scan-assembler-not "vmovapd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4b.c b/gcc/testsuite/gcc.target/i386/pr89229-4b.c
new file mode 100644
index 00000000000..228aeb7b580
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-4a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
+/* { dg-final { scan-assembler-not "vmovapd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4c.c b/gcc/testsuite/gcc.target/i386/pr89229-4c.c
new file mode 100644
index 00000000000..537c82fbc54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-4a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1


[-- Attachment #4: 0003-i386-Use-ix86_output_ssemov-for-DImode-TYPE_SSEMOV.patch --]
[-- Type: text/x-patch, Size: 4604 bytes --]

From 762e781167e1f1584e35087c301b1decc6794d13 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 14 Feb 2020 10:16:34 -0800
Subject: [PATCH 3/5] i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV

There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

gcc/

	PR target/89229
	* config/i386/i386.md (*movdi_internal): Call ix86_output_ssemov
	for TYPE_SSEMOV.  Remove ext_sse_reg_operand and TARGET_AVX512VL
	check.

gcc/testsuite/

	PR target/89229
	* gcc.target/i386/pr89229-5a.c: New test.
	* gcc.target/i386/pr89229-5b.c: Likewise.
	* gcc.target/i386/pr89229-5c.c: Likewise.
---
 gcc/config/i386/i386.md                    | 31 ++--------------------
 gcc/testsuite/gcc.target/i386/pr89229-5a.c | 17 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr89229-5b.c |  6 +++++
 gcc/testsuite/gcc.target/i386/pr89229-5c.c |  7 +++++
 4 files changed, 32 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5c.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0f57f939cc3..6fa5db0a452 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2054,31 +2054,7 @@ (define_insn "*movdi_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_DI:
-	  /* Handle broken assemblers that require movd instead of movq.  */
-	  if (!HAVE_AS_IX86_INTERUNIT_MOVQ
-	      && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
-	    return "%vmovd\t{%1, %0|%0, %1}";
-	  return "%vmovq\t{%1, %0|%0, %1}";
-
-	case MODE_TI:
-	  /* Handle AVX512 registers set.  */
-	  if (EXT_REX_SSE_REG_P (operands[0])
-	      || EXT_REX_SSE_REG_P (operands[1]))
-	    return "vmovdqa64\t{%1, %0|%0, %1}";
-	  return "%vmovdqa\t{%1, %0|%0, %1}";
-
-	case MODE_V2SF:
-	  gcc_assert (!TARGET_AVX);
-	  return "movlps\t{%1, %0|%0, %1}";
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     case TYPE_SSECVT:
       if (SSE_REG_P (operands[0]))
@@ -2164,10 +2140,7 @@ (define_insn "*movdi_internal"
      (cond [(eq_attr "alternative" "2")
 	      (const_string "SI")
 	    (eq_attr "alternative" "12,13")
-	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
-			  (match_operand 1 "ext_sse_reg_operand"))
-		       (const_string "TI")
-		     (match_test "TARGET_AVX")
+	      (cond [(match_test "TARGET_AVX")
 		       (const_string "TI")
 		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5a.c b/gcc/testsuite/gcc.target/i386/pr89229-5a.c
new file mode 100644
index 00000000000..cb9b071e873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-5a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+extern long long i;
+
+long long
+foo1 (void)
+{
+  register long long xmm16 __asm ("xmm16") = i;
+  asm volatile ("" : "+v" (xmm16));
+  register long long xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  return xmm17;
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa64\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5b.c b/gcc/testsuite/gcc.target/i386/pr89229-5b.c
new file mode 100644
index 00000000000..261f2e12e8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-5b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-5a.c"
+
+/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5c.c b/gcc/testsuite/gcc.target/i386/pr89229-5c.c
new file mode 100644
index 00000000000..5fe537f47cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-5c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-5a.c"
+
+/* { dg-final { scan-assembler-times "vmovdqa64\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1


[-- Attachment #5: 0004-i386-Use-ix86_output_ssemov-for-SFmode-TYPE_SSEMOV.patch --]
[-- Type: text/x-patch, Size: 5173 bytes --]

From 9bff24ba58a91fac044582bc03d3e0ab121b8067 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 14 Feb 2020 10:38:47 -0800
Subject: [PATCH 4/5] i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV

There is no need to set mode attribute to V16SFmode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

gcc/

	PR target/89229
	* config/i386/i386.c (ix86_output_ssemov): Handle MODE_SI and
	MODE_SF.
	* config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov
	for TYPE_SSEMOV.  Remove TARGET_PREFER_AVX256, TARGET_AVX512VL
	and ext_sse_reg_operand check.

gcc/testsuite/

	PR target/89229
	* gcc.target/i386/pr89229-6a.c: New test.
	* gcc.target/i386/pr89229-6b.c: Likewise.
	* gcc.target/i386/pr89229-6c.c: Likewise.
---
 gcc/config/i386/i386.c                     |  9 ++++++++
 gcc/config/i386/i386.md                    | 26 ++--------------------
 gcc/testsuite/gcc.target/i386/pr89229-6a.c | 16 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr89229-6b.c |  6 +++++
 gcc/testsuite/gcc.target/i386/pr89229-6c.c |  6 +++++
 5 files changed, 39 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6c.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 924f9558b24..d1910b42b1b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5127,12 +5127,21 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
       else
 	return "%vmovq\t{%1, %0|%0, %1}";
 
+    case MODE_SI:
+      return "%vmovd\t{%1, %0|%0, %1}";
+
     case MODE_DF:
       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
 	return "vmovsd\t{%d1, %0|%0, %d1}";
       else
 	return "%vmovsd\t{%1, %0|%0, %1}";
 
+    case MODE_SF:
+      if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
+	return "vmovss\t{%d1, %0|%0, %d1}";
+      else
+	return "%vmovss\t{%1, %0|%0, %1}";
+
     case MODE_V1DF:
       gcc_assert (!TARGET_AVX);
       return "movlpd\t{%1, %0|%0, %1}";
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6fa5db0a452..af39f90c68e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3490,24 +3490,7 @@ (define_insn "*movsf_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_SF:
-	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
-	    return "vmovss\t{%d1, %0|%0, %d1}";
-	  return "%vmovss\t{%1, %0|%0, %1}";
-
-	case MODE_V16SF:
-	  return "vmovaps\t{%g1, %g0|%g0, %g1}";
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-
-	case MODE_SI:
-	  return "%vmovd\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     case TYPE_MMXMOV:
       switch (get_attr_mode (insn))
@@ -3579,12 +3562,7 @@ (define_insn "*movsf_internal"
 		  better to maintain the whole registers in single format
 		  to avoid problems on using packed logical operations.  */
 	       (eq_attr "alternative" "6")
-		 (cond [(and (ior (not (match_test "TARGET_PREFER_AVX256"))
-				  (not (match_test "TARGET_AVX512VL")))
-			     (ior (match_operand 0 "ext_sse_reg_operand")
-				  (match_operand 1 "ext_sse_reg_operand")))
-			  (const_string "V16SF")
-			(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		 (cond [(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
 			     (match_test "TARGET_SSE_SPLIT_REGS"))
 			  (const_string "V4SF")
 		       ]
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6a.c b/gcc/testsuite/gcc.target/i386/pr89229-6a.c
new file mode 100644
index 00000000000..856115b2f5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-6a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern float d;
+
+void
+foo1 (float x)
+{
+  register float xmm16 __asm ("xmm16") = x;
+  asm volatile ("" : "+v" (xmm16));
+  register float xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  d = xmm17;
+}
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6b.c b/gcc/testsuite/gcc.target/i386/pr89229-6b.c
new file mode 100644
index 00000000000..a74f7169e6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-6b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-6a.c"
+
+/* { dg-final { scan-assembler-times "vmovaps\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6c.c b/gcc/testsuite/gcc.target/i386/pr89229-6c.c
new file mode 100644
index 00000000000..7a4d254670c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-6c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-6a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1


[-- Attachment #6: 0005-i386-Use-ix86_output_ssemov-for-SImode-TYPE_SSEMOV.patch --]
[-- Type: text/x-patch, Size: 4983 bytes --]

From f3151bd92c342ddddf95f54c2c1a2bad57ea56b1 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 14 Feb 2020 10:21:17 -0800
Subject: [PATCH 5/5] i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV

There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

Remove ext_sse_reg_operand since it is no longer needed.

gcc/

	PR target/89229
	* config/i386/i386.md (*movsi_internal): Call ix86_output_ssemov
	for TYPE_SSEMOV.  Remove ext_sse_reg_operand and TARGET_AVX512VL
	check.
	* config/i386/predicates.md (ext_sse_reg_operand): Removed.

gcc/testsuite/

	PR target/89229
	* gcc.target/i386/pr89229-7a.c: New test.
	* gcc.target/i386/pr89229-7b.c: Likewise.
	* gcc.target/i386/pr89229-7c.c: Likewise.
---
 gcc/config/i386/i386.md                    | 25 ++--------------------
 gcc/config/i386/predicates.md              |  5 -----
 gcc/testsuite/gcc.target/i386/pr89229-7a.c | 17 +++++++++++++++
 gcc/testsuite/gcc.target/i386/pr89229-7b.c |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr89229-7c.c |  7 ++++++
 5 files changed, 32 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7c.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index af39f90c68e..3051624d89f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2261,25 +2261,7 @@ (define_insn "*movsi_internal"
       gcc_unreachable ();
 
     case TYPE_SSEMOV:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_SI:
-          return "%vmovd\t{%1, %0|%0, %1}";
-	case MODE_TI:
-	  return "%vmovdqa\t{%1, %0|%0, %1}";
-	case MODE_XI:
-	  return "vmovdqa32\t{%g1, %g0|%g0, %g1}";
-
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-
-	case MODE_SF:
-	  gcc_assert (!TARGET_AVX);
-          return "movss\t{%1, %0|%0, %1}";
-
-	default:
-	  gcc_unreachable ();
-	}
+      return ix86_output_ssemov (insn, operands);
 
     case TYPE_MMX:
       return "pxor\t%0, %0";
@@ -2345,10 +2327,7 @@ (define_insn "*movsi_internal"
      (cond [(eq_attr "alternative" "2,3")
 	      (const_string "DI")
 	    (eq_attr "alternative" "8,9")
-	      (cond [(ior (match_operand 0 "ext_sse_reg_operand")
-			  (match_operand 1 "ext_sse_reg_operand"))
-		       (const_string "XI")
-		     (match_test "TARGET_AVX")
+	      (cond [(match_test "TARGET_AVX")
 		       (const_string "TI")
 		     (ior (not (match_test "TARGET_SSE2"))
 			  (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 1119366d54e..71f4cb1193c 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -61,11 +61,6 @@ (define_predicate "sse_reg_operand"
   (and (match_code "reg")
        (match_test "SSE_REGNO_P (REGNO (op))")))
 
-;; True if the operand is an AVX-512 new register.
-(define_predicate "ext_sse_reg_operand"
-  (and (match_code "reg")
-       (match_test "EXT_REX_SSE_REGNO_P (REGNO (op))")))
-
 ;; Return true if op is a QImode register.
 (define_predicate "any_QIreg_operand"
   (and (match_code "reg")
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7a.c b/gcc/testsuite/gcc.target/i386/pr89229-7a.c
new file mode 100644
index 00000000000..fd56f447016
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-7a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern int i;
+
+int
+foo1 (void)
+{
+  register int xmm16 __asm ("xmm16") = i;
+  asm volatile ("" : "+v" (xmm16));
+  register int xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  return xmm17;
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7b.c b/gcc/testsuite/gcc.target/i386/pr89229-7b.c
new file mode 100644
index 00000000000..d3a56e6e2b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-7b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-7a.c"
+
+/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7c.c b/gcc/testsuite/gcc.target/i386/pr89229-7c.c
new file mode 100644
index 00000000000..e14634e1edd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-7c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-7a.c"
+
+/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2020-03-12 10:53 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-29 14:16 V2 [PATCH 0/6] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move H.J. Lu
2020-02-29 14:16 ` [PATCH 1/6] i386: Properly encode vector registers in " H.J. Lu
2020-03-05 23:47   ` Jeff Law
2020-03-08 12:04     ` [COMMITTED, PATCH] gcc.target/i386/pr89229-3c.c: Include "pr89229-3a.c" H.J. Lu
2020-03-10 12:35     ` [PATCH 1/6] i386: Properly encode vector registers in vector move H.J. Lu
2020-02-29 14:16 ` [PATCH 4/6] i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV H.J. Lu
2020-03-12  3:41   ` Jeff Law
2020-02-29 14:16 ` [PATCH 2/6] i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV H.J. Lu
2020-03-12  3:32   ` Jeff Law
2020-02-29 14:16 ` [PATCH 5/6] i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV H.J. Lu
2020-03-12  3:46   ` Jeff Law
2020-02-29 14:16 ` [PATCH 6/6] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV H.J. Lu
2020-03-12  3:53   ` Jeff Law
2020-03-12 10:52     ` H.J. Lu
2020-02-29 15:30 ` [PATCH 3/6] i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV H.J. Lu
2020-03-12  3:39   ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).