From: Hongyu Wang <hongyu.wang@intel.com>
To: gcc-patches@gcc.gnu.org
Cc: ubizjak@gmail.com, vmakarov@redhat.com, jakub@redhat.com,
Kong Lingling <lingling.kong@intel.com>,
Hongtao Liu <hongtao.liu@intel.com>
Subject: [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns
Date: Fri, 22 Sep 2023 18:56:26 +0800 [thread overview]
Message-ID: <20230922105631.2298849-9-hongyu.wang@intel.com> (raw)
In-Reply-To: <20230922105631.2298849-1-hongyu.wang@intel.com>
For vector move insns like vmovdqa/vmovdqu, their evex counterparts
requrire explicit suffix 64/32/16/8. The usage of these instruction
are prohibited under AVX10_1 or AVX512F, so for we select
vmovaps/vmovups for vector load/store insns that contains EGPR if
ther is no AVX512VL, and keep the original move insn selection
otherwise.
gcc/ChangeLog:
* config/i386/i386.cc (ix86_get_ssemov): Check if egpr is used,
adjust mnemonic for vmovduq/vmovdqa.
* config/i386/sse.md (*<extract_type>_vinsert<shuffletype><extract_suf>_0):
Check if egpr is used, adjust mnemonic for vmovdqu/vmovdqa.
(avx_vec_concat<mode>): Likewise, and separate alternative 0 to
avx_noavx512f.
Co-authored-by: Kong Lingling <lingling.kong@intel.com>
Co-authored-by: Hongtao Liu <hongtao.liu@intel.com>
---
gcc/config/i386/i386.cc | 42 +++++++++++++++++++++++++++++++++++------
gcc/config/i386/sse.md | 34 +++++++++++++++++++++++----------
2 files changed, 60 insertions(+), 16 deletions(-)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index ea94663eb68..5d47c2af25e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5478,6 +5478,12 @@ ix86_get_ssemov (rtx *operands, unsigned size,
bool evex_reg_p = (size == 64
|| EXT_REX_SSE_REG_P (operands[0])
|| EXT_REX_SSE_REG_P (operands[1]));
+
+ bool egpr_p = (TARGET_APX_EGPR
+ && (x86_extended_rex2reg_mentioned_p (operands[0])
+ || x86_extended_rex2reg_mentioned_p (operands[1])));
+ bool egpr_vl = egpr_p && TARGET_AVX512VL;
+
machine_mode scalar_mode;
const char *opcode = NULL;
@@ -5550,12 +5556,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
{
case E_HFmode:
case E_BFmode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = (misaligned_p
? (TARGET_AVX512BW
? "vmovdqu16"
: "vmovdqu64")
: "vmovdqa64");
+ else if (egpr_p)
+ opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+ ? "vmovdqu16"
+ : "%vmovups")
+ : "%vmovaps");
else
opcode = (misaligned_p
? (TARGET_AVX512BW
@@ -5570,8 +5582,10 @@ ix86_get_ssemov (rtx *operands, unsigned size,
opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
break;
case E_TFmode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ else if (egpr_p)
+ opcode = misaligned_p ? "%vmovups" : "%vmovaps";
else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
break;
@@ -5584,12 +5598,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
switch (scalar_mode)
{
case E_QImode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = (misaligned_p
? (TARGET_AVX512BW
? "vmovdqu8"
: "vmovdqu64")
: "vmovdqa64");
+ else if (egpr_p)
+ opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+ ? "vmovdqu8"
+ : "%vmovups")
+ : "%vmovaps");
else
opcode = (misaligned_p
? (TARGET_AVX512BW
@@ -5598,12 +5618,18 @@ ix86_get_ssemov (rtx *operands, unsigned size,
: "%vmovdqa");
break;
case E_HImode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = (misaligned_p
? (TARGET_AVX512BW
? "vmovdqu16"
: "vmovdqu64")
: "vmovdqa64");
+ else if (egpr_p)
+ opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+ ? "vmovdqu16"
+ : "%vmovups")
+ : "%vmovaps");
else
opcode = (misaligned_p
? (TARGET_AVX512BW
@@ -5612,16 +5638,20 @@ ix86_get_ssemov (rtx *operands, unsigned size,
: "%vmovdqa");
break;
case E_SImode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
+ else if (egpr_p)
+ opcode = misaligned_p ? "%vmovups" : "%vmovaps";
else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
break;
case E_DImode:
case E_TImode:
case E_OImode:
- if (evex_reg_p)
+ if (evex_reg_p || egpr_vl)
opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ else if (egpr_p)
+ opcode = misaligned_p ? "%vmovups" : "%vmovaps";
else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
break;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 80b43fd7db7..256b0eedbbb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18912,6 +18912,12 @@ (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
{
if (which_alternative == 0)
return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
+ bool egpr_used = (TARGET_APX_EGPR
+ && x86_extended_rex2reg_mentioned_p (operands[2]));
+ const char *align_templ = egpr_used ? "vmovaps\t{%2, %x0|%x0, %2}"
+ : "vmovdqa\t{%2, %x0|%x0, %2}";
+ const char *unalign_templ = egpr_used ? "vmovups\t{%2, %x0|%x0, %2}"
+ : "vmovdqu\t{%2, %x0|%x0, %2}";
switch (<MODE>mode)
{
case E_V8DFmode:
@@ -18927,17 +18933,17 @@ (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
case E_V8DImode:
if (misaligned_operand (operands[2], <ssequartermode>mode))
return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
- : "vmovdqu\t{%2, %x0|%x0, %2}";
+ : unalign_templ;
else
return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
- : "vmovdqa\t{%2, %x0|%x0, %2}";
+ : align_templ;
case E_V16SImode:
if (misaligned_operand (operands[2], <ssequartermode>mode))
return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
- : "vmovdqu\t{%2, %x0|%x0, %2}";
+ : unalign_templ;
else
return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
- : "vmovdqa\t{%2, %x0|%x0, %2}";
+ : align_templ;
default:
gcc_unreachable ();
}
@@ -27661,11 +27667,13 @@ (define_insn "avx_vec_concat<mode>"
[(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
(vec_concat:V_256_512
(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
- (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
+ (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xBt,vm,C,C")))]
"TARGET_AVX
&& (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
|| !MEM_P (operands[1]))"
{
+ bool egpr_used = (TARGET_APX_EGPR
+ && x86_extended_rex2reg_mentioned_p (operands[1]));
switch (which_alternative)
{
case 0:
@@ -27713,7 +27721,8 @@ (define_insn "avx_vec_concat<mode>"
if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
{
if (which_alternative == 2)
- return "vmovdqu\t{%1, %t0|%t0, %1}";
+ return egpr_used ? "vmovups\t{%1, %t0|%t0, %1}"
+ : "vmovdqu\t{%1, %t0|%t0, %1}";
else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
return "vmovdqu64\t{%1, %t0|%t0, %1}";
else
@@ -27722,7 +27731,8 @@ (define_insn "avx_vec_concat<mode>"
else
{
if (which_alternative == 2)
- return "vmovdqa\t{%1, %t0|%t0, %1}";
+ return egpr_used ? "vmovaps\t{%1, %t0|%t0, %1}"
+ : "vmovdqa\t{%1, %t0|%t0, %1}";
else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
return "vmovdqa64\t{%1, %t0|%t0, %1}";
else
@@ -27732,7 +27742,8 @@ (define_insn "avx_vec_concat<mode>"
if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
{
if (which_alternative == 2)
- return "vmovdqu\t{%1, %x0|%x0, %1}";
+ return egpr_used ? "vmovups\t{%1, %x0|%x0, %1}"
+ : "vmovdqu\t{%1, %x0|%x0, %1}";
else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
return "vmovdqu64\t{%1, %x0|%x0, %1}";
else
@@ -27741,7 +27752,8 @@ (define_insn "avx_vec_concat<mode>"
else
{
if (which_alternative == 2)
- return "vmovdqa\t{%1, %x0|%x0, %1}";
+ return egpr_used ? "vmovaps\t{%1, %x0|%x0, %1}"
+ : "vmovdqa\t{%1, %x0|%x0, %1}";
else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
return "vmovdqa64\t{%1, %x0|%x0, %1}";
else
@@ -27754,7 +27766,9 @@ (define_insn "avx_vec_concat<mode>"
gcc_unreachable ();
}
}
- [(set_attr "type" "sselog,sselog,ssemov,ssemov")
+ [(set_attr "isa" "noavx512f,avx512f,*,*")
+ (set_attr "gpr32" "0,1,1,1")
+ (set_attr "type" "sselog,sselog,ssemov,ssemov")
(set_attr "prefix_extra" "1,1,*,*")
(set_attr "length_immediate" "1,1,*,*")
(set_attr "prefix" "maybe_evex")
--
2.31.1
next prev parent reply other threads:[~2023-09-22 10:56 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-22 10:56 [PATCH v2 00/13] Support Intel APX EGPR Hongyu Wang
2023-09-22 10:56 ` [PATCH 01/13] [APX EGPR] middle-end: Add insn argument to base_reg_class Hongyu Wang
2023-09-22 16:02 ` Vladimir Makarov
2023-10-07 8:22 ` Hongyu Wang
2023-09-22 10:56 ` [PATCH 02/13] [APX EGPR] middle-end: Add index_reg_class with insn argument Hongyu Wang
2023-09-22 16:03 ` Vladimir Makarov
2023-09-22 10:56 ` [PATCH 03/13] [APX_EGPR] Initial support for APX_F Hongyu Wang
2023-10-07 2:35 ` Hongtao Liu
2023-09-22 10:56 ` [PATCH 04/13] [APX EGPR] Add 16 new integer general purpose registers Hongyu Wang
2023-09-22 10:56 ` [PATCH 05/13] [APX EGPR] Add register and memory constraints that disallow EGPR Hongyu Wang
2023-09-22 10:56 ` [PATCH 06/13] [APX EGPR] Add backend hook for base_reg_class/index_reg_class Hongyu Wang
2023-09-22 10:56 ` [PATCH 07/13] [APX EGPR] Map reg/mem constraints in inline asm to non-EGPR constraint Hongyu Wang
2023-09-22 10:56 ` Hongyu Wang [this message]
2023-09-22 10:56 ` [PATCH 09/13] [APX EGPR] Handle legacy insn that only support GPR16 (1/5) Hongyu Wang
2023-09-22 10:56 ` [PATCH 10/13] [APX EGPR] Handle legacy insns that only support GPR16 (2/5) Hongyu Wang
2023-09-22 10:56 ` [PATCH 11/13] [APX EGPR] Handle legacy insns that only support GPR16 (3/5) Hongyu Wang
2023-09-22 10:56 ` [PATCH 12/13] [APX_EGPR] Handle legacy insns that only support GPR16 (4/5) Hongyu Wang
2023-09-22 10:56 ` [PATCH 13/13] [APX EGPR] Handle vex insns that only support GPR16 (5/5) Hongyu Wang
2023-09-25 2:02 ` [PATCH v2 00/13] Support Intel APX EGPR Hongtao Liu
-- strict thread matches above, loose matches on Subject: below --
2023-08-31 8:20 [PATCH 00/13] [RFC] " Hongyu Wang
2023-08-31 8:20 ` [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns Hongyu Wang
2023-08-31 9:43 ` Jakub Jelinek
2023-09-01 9:07 ` Hongyu Wang
2023-09-01 9:20 ` Jakub Jelinek
2023-09-01 11:34 ` Hongyu Wang
2023-09-01 11:41 ` Jakub Jelinek
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230922105631.2298849-9-hongyu.wang@intel.com \
--to=hongyu.wang@intel.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hongtao.liu@intel.com \
--cc=jakub@redhat.com \
--cc=lingling.kong@intel.com \
--cc=ubizjak@gmail.com \
--cc=vmakarov@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).