public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ix86/heads/apx)] [APX EGPR] Handle GPR16 only vector move insns
@ 2023-09-26  5:02 Hongyu Wang
  0 siblings, 0 replies; only message in thread
From: Hongyu Wang @ 2023-09-26  5:02 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d9ff30171d5befed99af2483793a139ededde0ea

commit d9ff30171d5befed99af2483793a139ededde0ea
Author: Hongyu Wang <hongyu.wang@intel.com>
Date:   Thu Aug 17 08:30:04 2023 +0800

    [APX EGPR] Handle GPR16 only vector move insns
    
    For vector move insns like vmovdqa/vmovdqu, their evex counterparts
    requrire explicit suffix 64/32/16/8. The usage of these instruction
    are prohibited under AVX10_1 or AVX512F, so for we select
    vmovaps/vmovups for vector load/store insns that contains EGPR if
    ther is no AVX512VL, and keep the original move insn selection
    otherwise.
    
    gcc/ChangeLog:
    
            * config/i386/i386.cc (ix86_get_ssemov): Check if egpr is used,
            adjust mnemonic for vmovduq/vmovdqa.
            * config/i386/sse.md (*<extract_type>_vinsert<shuffletype><extract_suf>_0):
            Check if egpr is used, adjust mnemonic for vmovdqu/vmovdqa.
            (avx_vec_concat<mode>): Likewise, and separate alternative 0 to
            avx_noavx512f.
    
    Co-authored-by: Kong Lingling <lingling.kong@intel.com>
    Co-authored-by: Hongtao Liu <hongtao.liu@intel.com>

Diff:
---
 gcc/config/i386/i386.cc | 42 ++++++++++++++++++++++++++++++++++++------
 gcc/config/i386/sse.md  | 34 ++++++++++++++++++++++++----------
 2 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index e9c07276168..396d58c23a2 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5478,6 +5478,12 @@ ix86_get_ssemov (rtx *operands, unsigned size,
   bool evex_reg_p = (size == 64
 		     || EXT_REX_SSE_REG_P (operands[0])
 		     || EXT_REX_SSE_REG_P (operands[1]));
+
+  bool egpr_p = (TARGET_APX_EGPR
+		 && (x86_extended_rex2reg_mentioned_p (operands[0])
+		     || x86_extended_rex2reg_mentioned_p (operands[1])));
+  bool egpr_vl = egpr_p && TARGET_AVX512VL;
+
   machine_mode scalar_mode;
 
   const char *opcode = NULL;
@@ -5550,12 +5556,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 	{
 	case E_HFmode:
 	case E_BFmode:
-	  if (evex_reg_p)
+	  if (evex_reg_p || egpr_vl)
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
 			 ? "vmovdqu16"
 			 : "vmovdqu64")
 		      : "vmovdqa64");
+	  else if (egpr_p)
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu16"
+			 : "%vmovups")
+		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
@@ -5570,8 +5582,10 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 	  opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
 	  break;
 	case E_TFmode:
-	  if (evex_reg_p)
+	  if (evex_reg_p || egpr_vl)
 	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+	  else if (egpr_p)
+	    opcode = misaligned_p ? "%vmovups" : "%vmovaps";
 	  else
 	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
 	  break;
@@ -5584,12 +5598,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
       switch (scalar_mode)
 	{
 	case E_QImode:
-	  if (evex_reg_p)
+	  if (evex_reg_p || egpr_vl)
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
 			 ? "vmovdqu8"
 			 : "vmovdqu64")
 		      : "vmovdqa64");
+	  else if (egpr_p)
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu8"
+			 : "%vmovups")
+		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
@@ -5598,12 +5618,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovdqa");
 	  break;
 	case E_HImode:
-	  if (evex_reg_p)
+	  if (evex_reg_p || egpr_vl)
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
 			 ? "vmovdqu16"
 			 : "vmovdqu64")
 		      : "vmovdqa64");
+	  else if (egpr_p)
+	    opcode = (misaligned_p
+		      ? (TARGET_AVX512BW
+			 ? "vmovdqu16"
+			 : "%vmovups")
+		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
 		      ? (TARGET_AVX512BW
@@ -5612,16 +5638,20 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovdqa");
 	  break;
 	case E_SImode:
-	  if (evex_reg_p)
+	  if (evex_reg_p || egpr_vl)
 	    opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
+	  else if (egpr_p)
+	    opcode = misaligned_p ? "%vmovups" : "%vmovaps";
 	  else
 	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
 	  break;
 	case E_DImode:
 	case E_TImode:
 	case E_OImode:
-	  if (evex_reg_p)
+	  if (evex_reg_p || egpr_vl)
 	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+	  else if (egpr_p)
+	    opcode = misaligned_p ? "%vmovups" : "%vmovaps";
 	  else
 	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
 	  break;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 80b43fd7db7..256b0eedbbb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18912,6 +18912,12 @@
 {
   if (which_alternative == 0)
     return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
+  bool egpr_used = (TARGET_APX_EGPR
+		    && x86_extended_rex2reg_mentioned_p (operands[2]));
+  const char *align_templ = egpr_used ? "vmovaps\t{%2, %x0|%x0, %2}"
+				      : "vmovdqa\t{%2, %x0|%x0, %2}";
+  const char *unalign_templ = egpr_used ? "vmovups\t{%2, %x0|%x0, %2}"
+					: "vmovdqu\t{%2, %x0|%x0, %2}";
   switch (<MODE>mode)
     {
     case E_V8DFmode:
@@ -18927,17 +18933,17 @@
     case E_V8DImode:
       if (misaligned_operand (operands[2], <ssequartermode>mode))
 	return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
-				      : "vmovdqu\t{%2, %x0|%x0, %2}";
+				      : unalign_templ;
       else
 	return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
-				      : "vmovdqa\t{%2, %x0|%x0, %2}";
+				      : align_templ;
     case E_V16SImode:
       if (misaligned_operand (operands[2], <ssequartermode>mode))
 	return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
-				      : "vmovdqu\t{%2, %x0|%x0, %2}";
+				      : unalign_templ;
       else
 	return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
-				      : "vmovdqa\t{%2, %x0|%x0, %2}";
+				      : align_templ;
     default:
       gcc_unreachable ();
     }
@@ -27661,11 +27667,13 @@
   [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
 	(vec_concat:V_256_512
 	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
-	  (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
+	  (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xBt,vm,C,C")))]
   "TARGET_AVX
    && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
        || !MEM_P (operands[1]))"
 {
+  bool egpr_used = (TARGET_APX_EGPR
+		    && x86_extended_rex2reg_mentioned_p (operands[1]));
   switch (which_alternative)
     {
     case 0:
@@ -27713,7 +27721,8 @@
 	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
 	    {
 	      if (which_alternative == 2)
-		return "vmovdqu\t{%1, %t0|%t0, %1}";
+		return egpr_used ? "vmovups\t{%1, %t0|%t0, %1}"
+				 : "vmovdqu\t{%1, %t0|%t0, %1}";
 	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
 		return "vmovdqu64\t{%1, %t0|%t0, %1}";
 	      else
@@ -27722,7 +27731,8 @@
 	  else
 	    {
 	      if (which_alternative == 2)
-		return "vmovdqa\t{%1, %t0|%t0, %1}";
+		return egpr_used ? "vmovaps\t{%1, %t0|%t0, %1}"
+				 : "vmovdqa\t{%1, %t0|%t0, %1}";
 	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
 		return "vmovdqa64\t{%1, %t0|%t0, %1}";
 	      else
@@ -27732,7 +27742,8 @@
 	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
 	    {
 	      if (which_alternative == 2)
-		return "vmovdqu\t{%1, %x0|%x0, %1}";
+		return egpr_used ? "vmovups\t{%1, %x0|%x0, %1}"
+				 : "vmovdqu\t{%1, %x0|%x0, %1}";
 	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
 		return "vmovdqu64\t{%1, %x0|%x0, %1}";
 	      else
@@ -27741,7 +27752,8 @@
 	  else
 	    {
 	      if (which_alternative == 2)
-		return "vmovdqa\t{%1, %x0|%x0, %1}";
+		return egpr_used ? "vmovaps\t{%1, %x0|%x0, %1}"
+				 : "vmovdqa\t{%1, %x0|%x0, %1}";
 	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
 		return "vmovdqa64\t{%1, %x0|%x0, %1}";
 	      else
@@ -27754,7 +27766,9 @@
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
+  [(set_attr "isa" "noavx512f,avx512f,*,*")
+   (set_attr "gpr32" "0,1,1,1")
+   (set_attr "type" "sselog,sselog,ssemov,ssemov")
    (set_attr "prefix_extra" "1,1,*,*")
    (set_attr "length_immediate" "1,1,*,*")
    (set_attr "prefix" "maybe_evex")

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-09-26  5:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-26  5:02 [gcc(refs/vendors/ix86/heads/apx)] [APX EGPR] Handle GPR16 only vector move insns Hongyu Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).