From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 407ED3858C66; Mon, 25 Sep 2023 01:57:51 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 407ED3858C66 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1695607071; bh=zChsvlATKpWkLArDEfbFluBOH6PachRwGqEDscP4qAc=; h=From:To:Subject:Date:From; b=kbtLpedya8ZM9mLarrRmya9TSg7RZR0++jEF5/D6Q3LR4tb7/k9iw+nIVd58ugl4R 4NbXE163lZ+8v6Mp6ubHN25UUdTmyvH+HQ8g1brasofrdq9klp48VmoP04alOjIAUL HzNA+U7gpb00FFnDgGA14OuIojX4BTtIivwRcanQ= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/ix86/heads/apx)] Handle GPR16 only vector move insns X-Act-Checkin: gcc X-Git-Author: Hongyu Wang X-Git-Refname: refs/vendors/ix86/heads/apx X-Git-Oldrev: 0dde27db6f1075eeee72e4bfb3616265872fcc89 X-Git-Newrev: 00b820a7d2bb37475eb145201bc49f7742391b6e Message-Id: <20230925015751.407ED3858C66@sourceware.org> Date: Mon, 25 Sep 2023 01:57:51 +0000 (GMT) List-Id: https://gcc.gnu.org/g:00b820a7d2bb37475eb145201bc49f7742391b6e commit 00b820a7d2bb37475eb145201bc49f7742391b6e Author: Hongyu Wang Date: Thu Aug 17 08:30:04 2023 +0800 Handle GPR16 only vector move insns For vector move insns like vmovdqa/vmovdqu, their evex counterparts requrire explicit suffix 64/32/16/8. The usage of these instruction are prohibited under AVX10_1 or AVX512F, so for we select vmovaps/vmovups for vector load/store insns that contains EGPR if ther is no AVX512VL, and keep the original move insn selection otherwise. gcc/ChangeLog: * config/i386/i386.cc (ix86_get_ssemov): Check if egpr is used, adjust mnemonic for vmovduq/vmovdqa. * config/i386/sse.md (*_vinsert_0): Check if egpr is used, adjust mnemonic for vmovdqu/vmovdqa. (avx_vec_concat): Likewise, and separate alternative 0 to avx_noavx512f. Co-authored-by: Kong Lingling Co-authored-by: Hongtao Liu Diff: --- gcc/config/i386/i386.cc | 42 ++++++++++++++++++++++++++++++++++++------ gcc/config/i386/sse.md | 34 ++++++++++++++++++++++++---------- 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index ea94663eb68..5d47c2af25e 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -5478,6 +5478,12 @@ ix86_get_ssemov (rtx *operands, unsigned size, bool evex_reg_p = (size == 64 || EXT_REX_SSE_REG_P (operands[0]) || EXT_REX_SSE_REG_P (operands[1])); + + bool egpr_p = (TARGET_APX_EGPR + && (x86_extended_rex2reg_mentioned_p (operands[0]) + || x86_extended_rex2reg_mentioned_p (operands[1]))); + bool egpr_vl = egpr_p && TARGET_AVX512VL; + machine_mode scalar_mode; const char *opcode = NULL; @@ -5550,12 +5556,18 @@ ix86_get_ssemov (rtx *operands, unsigned size, { case E_HFmode: case E_BFmode: - if (evex_reg_p) + if (evex_reg_p || egpr_vl) opcode = (misaligned_p ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64") : "vmovdqa64"); + else if (egpr_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "%vmovups") + : "%vmovaps"); else opcode = (misaligned_p ? (TARGET_AVX512BW @@ -5570,8 +5582,10 @@ ix86_get_ssemov (rtx *operands, unsigned size, opcode = misaligned_p ? "%vmovupd" : "%vmovapd"; break; case E_TFmode: - if (evex_reg_p) + if (evex_reg_p || egpr_vl) opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + else if (egpr_p) + opcode = misaligned_p ? "%vmovups" : "%vmovaps"; else opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; break; @@ -5584,12 +5598,18 @@ ix86_get_ssemov (rtx *operands, unsigned size, switch (scalar_mode) { case E_QImode: - if (evex_reg_p) + if (evex_reg_p || egpr_vl) opcode = (misaligned_p ? (TARGET_AVX512BW ? "vmovdqu8" : "vmovdqu64") : "vmovdqa64"); + else if (egpr_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu8" + : "%vmovups") + : "%vmovaps"); else opcode = (misaligned_p ? (TARGET_AVX512BW @@ -5598,12 +5618,18 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovdqa"); break; case E_HImode: - if (evex_reg_p) + if (evex_reg_p || egpr_vl) opcode = (misaligned_p ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64") : "vmovdqa64"); + else if (egpr_p) + opcode = (misaligned_p + ? (TARGET_AVX512BW + ? "vmovdqu16" + : "%vmovups") + : "%vmovaps"); else opcode = (misaligned_p ? (TARGET_AVX512BW @@ -5612,16 +5638,20 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovdqa"); break; case E_SImode: - if (evex_reg_p) + if (evex_reg_p || egpr_vl) opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; + else if (egpr_p) + opcode = misaligned_p ? "%vmovups" : "%vmovaps"; else opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; break; case E_DImode: case E_TImode: case E_OImode: - if (evex_reg_p) + if (evex_reg_p || egpr_vl) opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + else if (egpr_p) + opcode = misaligned_p ? "%vmovups" : "%vmovaps"; else opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; break; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 80b43fd7db7..256b0eedbbb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -18912,6 +18912,12 @@ { if (which_alternative == 0) return "vinsert\t{$0, %2, %1, %0|%0, %1, %2, 0}"; + bool egpr_used = (TARGET_APX_EGPR + && x86_extended_rex2reg_mentioned_p (operands[2])); + const char *align_templ = egpr_used ? "vmovaps\t{%2, %x0|%x0, %2}" + : "vmovdqa\t{%2, %x0|%x0, %2}"; + const char *unalign_templ = egpr_used ? "vmovups\t{%2, %x0|%x0, %2}" + : "vmovdqu\t{%2, %x0|%x0, %2}"; switch (mode) { case E_V8DFmode: @@ -18927,17 +18933,17 @@ case E_V8DImode: if (misaligned_operand (operands[2], mode)) return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}" - : "vmovdqu\t{%2, %x0|%x0, %2}"; + : unalign_templ; else return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}" - : "vmovdqa\t{%2, %x0|%x0, %2}"; + : align_templ; case E_V16SImode: if (misaligned_operand (operands[2], mode)) return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}" - : "vmovdqu\t{%2, %x0|%x0, %2}"; + : unalign_templ; else return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}" - : "vmovdqa\t{%2, %x0|%x0, %2}"; + : align_templ; default: gcc_unreachable (); } @@ -27661,11 +27667,13 @@ [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv") (vec_concat:V_256_512 (match_operand: 1 "nonimmediate_operand" "x,v,xm,vm") - (match_operand: 2 "nonimm_or_0_operand" "xm,vm,C,C")))] + (match_operand: 2 "nonimm_or_0_operand" "xBt,vm,C,C")))] "TARGET_AVX && (operands[2] == CONST0_RTX (mode) || !MEM_P (operands[1]))" { + bool egpr_used = (TARGET_APX_EGPR + && x86_extended_rex2reg_mentioned_p (operands[1])); switch (which_alternative) { case 0: @@ -27713,7 +27721,8 @@ if (misaligned_operand (operands[1], mode)) { if (which_alternative == 2) - return "vmovdqu\t{%1, %t0|%t0, %1}"; + return egpr_used ? "vmovups\t{%1, %t0|%t0, %1}" + : "vmovdqu\t{%1, %t0|%t0, %1}"; else if (GET_MODE_SIZE (mode) == 8) return "vmovdqu64\t{%1, %t0|%t0, %1}"; else @@ -27722,7 +27731,8 @@ else { if (which_alternative == 2) - return "vmovdqa\t{%1, %t0|%t0, %1}"; + return egpr_used ? "vmovaps\t{%1, %t0|%t0, %1}" + : "vmovdqa\t{%1, %t0|%t0, %1}"; else if (GET_MODE_SIZE (mode) == 8) return "vmovdqa64\t{%1, %t0|%t0, %1}"; else @@ -27732,7 +27742,8 @@ if (misaligned_operand (operands[1], mode)) { if (which_alternative == 2) - return "vmovdqu\t{%1, %x0|%x0, %1}"; + return egpr_used ? "vmovups\t{%1, %x0|%x0, %1}" + : "vmovdqu\t{%1, %x0|%x0, %1}"; else if (GET_MODE_SIZE (mode) == 8) return "vmovdqu64\t{%1, %x0|%x0, %1}"; else @@ -27741,7 +27752,8 @@ else { if (which_alternative == 2) - return "vmovdqa\t{%1, %x0|%x0, %1}"; + return egpr_used ? "vmovaps\t{%1, %x0|%x0, %1}" + : "vmovdqa\t{%1, %x0|%x0, %1}"; else if (GET_MODE_SIZE (mode) == 8) return "vmovdqa64\t{%1, %x0|%x0, %1}"; else @@ -27754,7 +27766,9 @@ gcc_unreachable (); } } - [(set_attr "type" "sselog,sselog,ssemov,ssemov") + [(set_attr "isa" "noavx512f,avx512f,*,*") + (set_attr "gpr32" "0,1,1,1") + (set_attr "type" "sselog,sselog,ssemov,ssemov") (set_attr "prefix_extra" "1,1,*,*") (set_attr "length_immediate" "1,1,*,*") (set_attr "prefix" "maybe_evex")