public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Hongyu Wang <hongyu.wang@intel.com>
To: gcc-patches@gcc.gnu.org
Cc: hongtao.liu@intel.com, ubizjak@gmail.com, hubicka@ucw.cz,
	vmakarov@redhat.com, jakub@redhat.com
Subject: [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns
Date: Thu, 31 Aug 2023 16:20:19 +0800	[thread overview]
Message-ID: <20230831082024.314097-9-hongyu.wang@intel.com> (raw)
In-Reply-To: <20230831082024.314097-1-hongyu.wang@intel.com>

For vector move insns like vmovdqa/vmovdqu, their evex counterparts
requrire explicit suffix 64/32/16/8. The usage of these instruction
are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select
vmovaps/vmovups for vector load/store insns that contains EGPR.

gcc/ChangeLog:

	* config/i386/i386.cc (ix86_get_ssemov): Check if egpr is used,
	adjust mnemonic for vmovduq/vmovdqa.
	* config/i386/sse.md (*<extract_type>_vinsert<shuffletype><extract_suf>_0):
	Check if egpr is used, adjust mnemonic for vmovdqu/vmovdqa.
	(avx_vec_concat<mode>): Likewise, and separate alternative 0 to
	avx_noavx512f.
---
 gcc/config/i386/i386.cc | 31 ++++++++++++++++++++++++++++++-
 gcc/config/i386/sse.md  | 34 ++++++++++++++++++++++++----------
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 412f3aefc43..f5d642948bc 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5469,6 +5469,11 @@ ix86_get_ssemov (rtx *operands, unsigned size,
   bool evex_reg_p = (size == 64
 		     || EXT_REX_SSE_REG_P (operands[0])
 		     || EXT_REX_SSE_REG_P (operands[1]));
+
+  bool egpr_p = (TARGET_APX_EGPR
+		 && (x86_extended_rex2reg_mentioned_p (operands[0])
+		     || x86_extended_rex2reg_mentioned_p (operands[1])));
+
   machine_mode scalar_mode;
 
   const char *opcode = NULL;
@@ -5547,6 +5552,12 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 			 ? "vmovdqu16"
 			 : "vmovdqu64")
 		      : "vmovdqa64");
+	  else if (egpr_p)
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu16"
+			 : "%vmovups")
+		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
@@ -5563,6 +5574,8 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 	case E_TFmode:
 	  if (evex_reg_p)
 	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+	  else if (egpr_p)
+	    opcode = misaligned_p ? "%vmovups" : "%vmovaps";
 	  else
 	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
 	  break;
@@ -5581,6 +5594,12 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 			 ? "vmovdqu8"
 			 : "vmovdqu64")
 		      : "vmovdqa64");
+	  else if (egpr_p)
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu8"
+			 : "%vmovups")
+		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
@@ -5589,12 +5608,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovdqa");
 	  break;
 	case E_HImode:
-	  if (evex_reg_p)
+	  if (evex_reg_p || egpr_p)
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
 			 ? "vmovdqu16"
 			 : "vmovdqu64")
 		      : "vmovdqa64");
+	  else if (egpr_p)
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu16"
+			 : "%vmovups")
+		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
@@ -5605,6 +5630,8 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 	case E_SImode:
 	  if (evex_reg_p)
 	    opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
+	  else if (egpr_p)
+	    opcode = misaligned_p ? "%vmovups" : "%vmovaps";
 	  else
 	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
 	  break;
@@ -5613,6 +5640,8 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 	case E_OImode:
 	  if (evex_reg_p)
 	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+	  else if (egpr_p)
+	    opcode = misaligned_p ? "%vmovups" : "%vmovaps";
 	  else
 	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
 	  break;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 192e746fda3..bd6674d34f9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18918,6 +18918,12 @@ (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
 {
   if (which_alternative == 0)
     return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
+  bool egpr_used = (TARGET_APX_EGPR
+		    && x86_extended_rex2reg_mentioned_p (operands[2]));
+  const char *align_templ = egpr_used ? "vmovdqa\t{%2, %x0|%x0, %2}"
+				      : "vmovaps\t{%2, %x0|%x0, %2}";
+  const char *unalign_templ = egpr_used ? "vmovdqu\t{%2, %x0|%x0, %2}"
+					: "vmovups\t{%2, %x0|%x0, %2}";
   switch (<MODE>mode)
     {
     case E_V8DFmode:
@@ -18933,17 +18939,17 @@ (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
     case E_V8DImode:
       if (misaligned_operand (operands[2], <ssequartermode>mode))
 	return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
-				      : "vmovdqu\t{%2, %x0|%x0, %2}";
+				      : unalign_templ;
       else
 	return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
-				      : "vmovdqa\t{%2, %x0|%x0, %2}";
+				      : align_templ;
     case E_V16SImode:
       if (misaligned_operand (operands[2], <ssequartermode>mode))
 	return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
-				      : "vmovdqu\t{%2, %x0|%x0, %2}";
+				      : unalign_templ;
       else
 	return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
-				      : "vmovdqa\t{%2, %x0|%x0, %2}";
+				      : align_templ;
     default:
       gcc_unreachable ();
     }
@@ -27652,11 +27658,13 @@ (define_insn "avx_vec_concat<mode>"
   [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
 	(vec_concat:V_256_512
 	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
-	  (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
+	  (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xBt,vm,C,C")))]
   "TARGET_AVX
    && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
        || !MEM_P (operands[1]))"
 {
+  bool egpr_used = (TARGET_APX_EGPR
+		    && x86_extended_rex2reg_mentioned_p (operands[1]));
   switch (which_alternative)
     {
     case 0:
@@ -27704,7 +27712,8 @@ (define_insn "avx_vec_concat<mode>"
 	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
 	    {
 	      if (which_alternative == 2)
-		return "vmovdqu\t{%1, %t0|%t0, %1}";
+		return egpr_used ? "vmovups\t{%1, %t0|%t0, %1}"
+				 : "vmovdqu\t{%1, %t0|%t0, %1}";
 	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
 		return "vmovdqu64\t{%1, %t0|%t0, %1}";
 	      else
@@ -27713,7 +27722,8 @@ (define_insn "avx_vec_concat<mode>"
 	  else
 	    {
 	      if (which_alternative == 2)
-		return "vmovdqa\t{%1, %t0|%t0, %1}";
+		return egpr_used ? "vmovaps\t{%1, %t0|%t0, %1}"
+				 : "vmovdqa\t{%1, %t0|%t0, %1}";
 	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
 		return "vmovdqa64\t{%1, %t0|%t0, %1}";
 	      else
@@ -27723,7 +27733,8 @@ (define_insn "avx_vec_concat<mode>"
 	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
 	    {
 	      if (which_alternative == 2)
-		return "vmovdqu\t{%1, %x0|%x0, %1}";
+		return egpr_used ? "vmovups\t{%1, %x0|%x0, %1}"
+				 : "vmovdqu\t{%1, %x0|%x0, %1}";
 	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
 		return "vmovdqu64\t{%1, %x0|%x0, %1}";
 	      else
@@ -27732,7 +27743,8 @@ (define_insn "avx_vec_concat<mode>"
 	  else
 	    {
 	      if (which_alternative == 2)
-		return "vmovdqa\t{%1, %x0|%x0, %1}";
+		return egpr_used ? "vmovaps\t{%1, %x0|%x0, %1}"
+				 : "vmovdqa\t{%1, %x0|%x0, %1}";
 	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
 		return "vmovdqa64\t{%1, %x0|%x0, %1}";
 	      else
@@ -27745,7 +27757,9 @@ (define_insn "avx_vec_concat<mode>"
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
+  [(set_attr "isa" "noavx512f,avx512f,*,*")
+   (set_attr "gpr32" "0,1,1,1")
+   (set_attr "type" "sselog,sselog,ssemov,ssemov")
    (set_attr "prefix_extra" "1,1,*,*")
    (set_attr "length_immediate" "1,1,*,*")
    (set_attr "prefix" "maybe_evex")
-- 
2.31.1


  parent reply	other threads:[~2023-08-31  8:20 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-31  8:20 [PATCH 00/13] [RFC] Support Intel APX EGPR Hongyu Wang
2023-08-31  8:20 ` [PATCH 01/13] [APX EGPR] middle-end: Add insn argument to base_reg_class Hongyu Wang
2023-08-31 10:15   ` Uros Bizjak
2023-09-01  9:07     ` Hongyu Wang
2023-09-06 19:43       ` Vladimir Makarov
2023-09-07  6:23         ` Uros Bizjak
2023-09-07 12:13           ` Vladimir Makarov
2023-09-08 17:03   ` Vladimir Makarov
2023-09-10  4:49     ` Hongyu Wang
2023-09-14 12:09       ` Vladimir Makarov
2023-08-31  8:20 ` [PATCH 02/13] [APX EGPR] middle-end: Add index_reg_class with insn argument Hongyu Wang
2023-08-31  8:20 ` [PATCH 03/13] [APX_EGPR] Initial support for APX_F Hongyu Wang
2023-08-31  8:20 ` [PATCH 04/13] [APX EGPR] Add 16 new integer general purpose registers Hongyu Wang
2023-08-31  8:20 ` [PATCH 05/13] [APX EGPR] Add register and memory constraints that disallow EGPR Hongyu Wang
2023-08-31  8:20 ` [PATCH 06/13] [APX EGPR] Map reg/mem constraints in inline asm to non-EGPR constraint Hongyu Wang
2023-08-31  9:17   ` Jakub Jelinek
2023-08-31 10:00     ` Uros Bizjak
2023-09-01  9:04       ` Hongyu Wang
2023-09-01  9:38         ` Uros Bizjak
2023-09-01 10:35           ` Hongtao Liu
2023-09-01 11:27             ` Uros Bizjak
2023-09-04  0:28               ` Hongtao Liu
2023-09-04  8:57                 ` Uros Bizjak
2023-09-04  9:10                   ` Hongtao Liu
2023-09-01 11:03       ` Richard Sandiford
2023-09-04  1:03         ` Hongtao Liu
2023-09-01  9:04     ` Hongyu Wang
2023-08-31  8:20 ` [PATCH 07/13] [APX EGPR] Add backend hook for base_reg_class/index_reg_class Hongyu Wang
2023-08-31  8:20 ` Hongyu Wang [this message]
2023-08-31  9:43   ` [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns Jakub Jelinek
2023-09-01  9:07     ` Hongyu Wang
2023-09-01  9:20       ` Jakub Jelinek
2023-09-01 11:34         ` Hongyu Wang
2023-09-01 11:41           ` Jakub Jelinek
2023-08-31  8:20 ` [PATCH 09/13] [APX EGPR] Handle legacy insn that only support GPR16 (1/5) Hongyu Wang
2023-08-31 10:06   ` Uros Bizjak
2023-08-31  8:20 ` [PATCH 10/13] [APX EGPR] Handle legacy insns that only support GPR16 (2/5) Hongyu Wang
2023-08-31  8:20 ` [PATCH 11/13] [APX EGPR] Handle legacy insns that only support GPR16 (3/5) Hongyu Wang
2023-08-31  9:26   ` Richard Biener
2023-08-31  9:28     ` Richard Biener
2023-09-01  9:03       ` Hongyu Wang
2023-09-01 10:38       ` Hongtao Liu
2023-08-31  9:31     ` Jakub Jelinek
2023-08-31  8:20 ` [PATCH 12/13] [APX_EGPR] Handle legacy insns that only support GPR16 (4/5) Hongyu Wang
2023-08-31  8:20 ` [PATCH 13/13] [APX EGPR] Handle vex insns that only support GPR16 (5/5) Hongyu Wang
2023-08-31  9:19 ` [PATCH 00/13] [RFC] Support Intel APX EGPR Richard Biener
2023-09-01  8:55   ` Hongyu Wang
2023-09-22 10:56 [PATCH v2 00/13] " Hongyu Wang
2023-09-22 10:56 ` [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns Hongyu Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230831082024.314097-9-hongyu.wang@intel.com \
    --to=hongyu.wang@intel.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hongtao.liu@intel.com \
    --cc=hubicka@ucw.cz \
    --cc=jakub@redhat.com \
    --cc=ubizjak@gmail.com \
    --cc=vmakarov@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).