* [PATCH 01/16] [APX NDD] Support Intel APX NDD for legacy add insn
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:46 ` [PATCH 02/16] [APX NDD] Restrict TImode register usage when NDD enabled Hongyu Wang
` (14 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu, Kong Lingling
From: Kong Lingling <lingling.kong@intel.com>
APX NDD provides an extra destination register operand for several gpr
related legacy insns, so a new alternative can be adopted to operand1
with "r" constraint.
This first patch supports NDD for add instruction, and keeps to use lea
when all operands are registers since lea have shorter encoding. For
add operations containing mem NDD will be adopted to save an extra move.
In legacy x86 binary operation expand it will force operands[0] and
operands[1] to be the same so add a helper function to allow NDD form
pattern that operands[0] and operands[1] can be different.
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_can_use_ndd_p): New function.
(ix86_fixup_binary_operands): Add new use_ndd flag to check
whether ndd can be used for this binop and adjust operand emit.
(ix86_binary_operator_ok): Likewise.
(ix86_expand_binary_operator): Likewise, and void postreload
expand generate lea pattern when use_ndd is explicit parsed.
* config/i386/i386-options.cc (ix86_option_override_internal):
Prohibit apx subfeatures when not in 64bit mode.
* config/i386/i386-protos.h (ix86_binary_operator_ok):
Add use_ndd flag.
(ix86_fixup_binary_operand): Likewise.
(ix86_expand_binary_operand): Likewise.
* config/i386/i386.md (*add<mode>_1): Extend with new alternatives
to support NDD, and adjust output template.
(*addhi_1): Likewise.
(*addqi_1): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: New test.
---
gcc/config/i386/i386-expand.cc | 31 +++++--
gcc/config/i386/i386-options.cc | 3 +
gcc/config/i386/i386-protos.h | 7 +-
gcc/config/i386/i386.md | 109 ++++++++++++++----------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 21 +++++
5 files changed, 118 insertions(+), 53 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd.c
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a8d871d321e..ea0e5881087 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1260,6 +1260,22 @@ ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
return false;
}
+/* APX extends most (but not all) integer instructions with a new form that
+ has a third register operand called a nondestructive destination (NDD). */
+
+bool ix86_can_use_ndd_p (enum rtx_code code)
+{
+ if (!TARGET_APX_NDD)
+ return false;
+ switch (code)
+ {
+ case PLUS:
+ return true;
+ default:
+ return false;
+ }
+ return false;
+}
/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
destination to use for the operation. If different from the true
@@ -1267,7 +1283,7 @@ ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
rtx
ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
- rtx operands[])
+ rtx operands[], bool use_ndd)
{
rtx dst = operands[0];
rtx src1 = operands[1];
@@ -1307,7 +1323,7 @@ ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
src1 = force_reg (mode, src1);
/* Source 1 cannot be a non-matching memory. */
- if (MEM_P (src1) && !rtx_equal_p (dst, src1))
+ if (!use_ndd && MEM_P (src1) && !rtx_equal_p (dst, src1))
src1 = force_reg (mode, src1);
/* Improve address combine. */
@@ -1338,11 +1354,11 @@ ix86_fixup_binary_operands_no_copy (enum rtx_code code,
void
ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
- rtx operands[])
+ rtx operands[], bool use_ndd)
{
rtx src1, src2, dst, op, clob;
- dst = ix86_fixup_binary_operands (code, mode, operands);
+ dst = ix86_fixup_binary_operands (code, mode, operands, use_ndd);
src1 = operands[1];
src2 = operands[2];
@@ -1352,7 +1368,8 @@ ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
if (reload_completed
&& code == PLUS
- && !rtx_equal_p (dst, src1))
+ && !rtx_equal_p (dst, src1)
+ && !use_ndd)
{
/* This is going to be an LEA; avoid splitting it later. */
emit_insn (op);
@@ -1451,7 +1468,7 @@ ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
bool
ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
- rtx operands[3])
+ rtx operands[3], bool use_ndd)
{
rtx dst = operands[0];
rtx src1 = operands[1];
@@ -1475,7 +1492,7 @@ ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
return false;
/* Source 1 cannot be a non-matching memory. */
- if (MEM_P (src1) && !rtx_equal_p (dst, src1))
+ if (!use_ndd && MEM_P (src1) && !rtx_equal_p (dst, src1))
/* Support "andhi/andsi/anddi" as a zero-extending move. */
return (code == AND
&& (mode == HImode
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index df7d24352d1..7bef8ed8e8a 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2099,6 +2099,9 @@ ix86_option_override_internal (bool main_args_p,
if (TARGET_APX_F && !TARGET_64BIT)
error ("%<-mapxf%> is not supported for 32-bit code");
+ if (opts->x_ix86_apx_features != apx_none && !TARGET_64BIT)
+ error ("%<-mapx-features=%> option is not supported for 32-bit code");
+
if (TARGET_UINTR && !TARGET_64BIT)
error ("%<-muintr%> not supported for 32-bit code");
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 28d0eab11d5..3e08eae4e79 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -108,14 +108,15 @@ extern void ix86_expand_move (machine_mode, rtx[]);
extern void ix86_expand_vector_move (machine_mode, rtx[]);
extern void ix86_expand_vector_move_misalign (machine_mode, rtx[]);
extern rtx ix86_fixup_binary_operands (enum rtx_code,
- machine_mode, rtx[]);
+ machine_mode, rtx[], bool = false);
extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
machine_mode, rtx[]);
extern void ix86_expand_binary_operator (enum rtx_code,
- machine_mode, rtx[]);
+ machine_mode, rtx[], bool = false);
extern void ix86_expand_vector_logical_operator (enum rtx_code,
machine_mode, rtx[]);
-extern bool ix86_binary_operator_ok (enum rtx_code, machine_mode, rtx[3]);
+extern bool ix86_binary_operator_ok (enum rtx_code, machine_mode, rtx[3], bool = false);
+extern bool ix86_can_use_ndd_p (enum rtx_code);
extern bool ix86_avoid_lea_for_add (rtx_insn *, rtx[]);
extern bool ix86_use_lea_for_mov (rtx_insn *, rtx[]);
extern bool ix86_avoid_lea_for_addr (rtx_insn *, rtx[]);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 29289f48e9c..daab634fea0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -559,7 +559,7 @@ (define_attr "unit" "integer,i387,sse,mmx,unknown"
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
- x64_avx,x64_avx512bw,x64_avx512dq,aes,
+ x64_avx,x64_avx512bw,x64_avx512dq,aes,apx_ndd,
sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
@@ -957,6 +957,8 @@ (define_attr "enabled" ""
(symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
(eq_attr "isa" "vpclmulqdqvl")
(symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
+ (eq_attr "isa" "apx_ndd")
+ (symbol_ref "TARGET_APX_NDD")
(eq_attr "mmx_isa" "native")
(symbol_ref "!TARGET_MMX_WITH_SSE")
@@ -6229,7 +6231,8 @@ (define_expand "add<mode>3"
(plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
(match_operand:SDWIM 2 "<general_hilo_operand>")))]
""
- "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;")
+ "ix86_expand_binary_operator (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS)); DONE;")
(define_insn_and_split "*add<dwi>3_doubleword"
[(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
@@ -6356,26 +6359,29 @@ (define_insn_and_split "*add<dwi>3_doubleword_concat_zext"
"split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
(define_insn "*add<mode>_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r")
(plus:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
- (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le")))
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rm,r")
+ (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,re,BM")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
{
+ bool use_ndd = (which_alternative == 4 || which_alternative == 5);
switch (get_attr_type (insn))
{
case TYPE_LEA:
return "#";
case TYPE_INCDEC:
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (operands[2] == const1_rtx)
- return "inc{<imodesuffix>}\t%0";
+ return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "inc{<imodesuffix>}\t%0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return "dec{<imodesuffix>}\t%0";
+ return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "dec{<imodesuffix>}\t%0";
}
default:
@@ -6384,14 +6390,16 @@ (define_insn "*add<mode>_1"
if (which_alternative == 2)
std::swap (operands[1], operands[2]);
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
- return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
- return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
+ (set (attr "type")
(cond [(eq_attr "alternative" "3")
(const_string "lea")
(match_operand:SWI48 2 "incdec_operand")
@@ -6460,25 +6468,26 @@ (define_insn "addsi_1_zext"
(set_attr "mode" "SI")])
(define_insn "*addhi_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp")
- (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp")
- (match_operand:HI 2 "general_operand" "rn,m,0,ln")))
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
+ (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (PLUS, HImode, operands)"
+ "ix86_binary_operator_ok (PLUS, HImode, operands,
+ ix86_can_use_ndd_p (PLUS))"
{
+ bool use_ndd = (which_alternative == 4 || which_alternative == 5);
switch (get_attr_type (insn))
{
case TYPE_LEA:
return "#";
case TYPE_INCDEC:
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (operands[2] == const1_rtx)
- return "inc{w}\t%0";
+ return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return "dec{w}\t%0";
+ return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
}
default:
@@ -6487,14 +6496,16 @@ (define_insn "*addhi_1"
if (which_alternative == 2)
std::swap (operands[1], operands[2]);
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (x86_maybe_negate_const_int (&operands[2], HImode))
- return "sub{w}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "sub{w}\t{%2, %0|%0, %2}";
- return "add{w}\t{%2, %0|%0, %2}";
+ return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "add{w}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
+ (set (attr "type")
(cond [(eq_attr "alternative" "3")
(const_string "lea")
(match_operand:HI 2 "incdec_operand")
@@ -6506,30 +6517,38 @@ (define_insn "*addhi_1"
(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "HI,HI,HI,SI")])
+ (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
(define_insn "*addqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp")
- (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp")
- (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
+ (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (PLUS, QImode, operands)"
+ "ix86_binary_operator_ok (PLUS, QImode, operands,
+ ix86_can_use_ndd_p (PLUS))"
{
bool widen = (get_attr_mode (insn) != MODE_QI);
-
+ bool use_ndd = (which_alternative == 6 || which_alternative == 7);
switch (get_attr_type (insn))
{
case TYPE_LEA:
return "#";
case TYPE_INCDEC:
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (operands[2] == const1_rtx)
- return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+ if (use_ndd)
+ return widen ? "inc{l}\t{%1, %k0|%k0, %1}"
+ : "inc{b}\t{%1, %0|%0, %1}";
+ else
+ return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+ if (use_ndd)
+ return widen ? "dec{l}\t{%1, %k0|%k0, %1}"
+ : "dec{b}\t{%1, %0|%0, %1}";
+ else
+ return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
}
default:
@@ -6538,21 +6557,25 @@ (define_insn "*addqi_1"
if (which_alternative == 2 || which_alternative == 4)
std::swap (operands[1], operands[2]);
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (x86_maybe_negate_const_int (&operands[2], QImode))
{
- if (widen)
- return "sub{l}\t{%2, %k0|%k0, %2}";
+ if (use_ndd)
+ return widen ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "sub{b}\t{%2, %1, %0|%0, %1, %2}";
else
- return "sub{b}\t{%2, %0|%0, %2}";
+ return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
+ : "sub{b}\t{%2, %0|%0, %2}";
}
- if (widen)
- return "add{l}\t{%k2, %k0|%k0, %k2}";
+ if (use_ndd)
+ return widen ? "add{l}\t{%k2, %1, %k0|%k0, %1, %k2}"
+ : "add{b}\t{%2, %1, %0|%0, %1, %2}";
else
- return "add{b}\t{%2, %0|%0, %2}";
+ return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
+ : "add{b}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
+ (set (attr "type")
(cond [(eq_attr "alternative" "5")
(const_string "lea")
(match_operand:QI 2 "incdec_operand")
@@ -6564,10 +6587,10 @@ (define_insn "*addqi_1"
(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "QI,QI,QI,SI,SI,SI")
+ (set_attr "mode" "QI,QI,QI,SI,SI,SI,SI,SI")
;; Potential partial reg stall on alternatives 3 and 4.
(set (attr "preferred_for_speed")
- (cond [(eq_attr "alternative" "3,4")
+ (cond [(eq_attr "alternative" "3,4,6,7")
(symbol_ref "!TARGET_PARTIAL_REG_STALL")]
(symbol_ref "true")))])
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
new file mode 100644
index 00000000000..dd3dc78e52f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -0,0 +1,21 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapxf -O2" } */
+/* { dg-final { scan-assembler-not "movl"} } */
+
+int foo (int *a)
+{
+ int b = *a - 1;
+ return b;
+}
+
+int foo2 (int a, int b)
+{
+ int c = a + b;
+ return c;
+}
+
+int foo3 (int *a, int b)
+{
+ int c = *a + b;
+ return c;
+}
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 02/16] [APX NDD] Restrict TImode register usage when NDD enabled
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
2023-11-15 9:46 ` [PATCH 01/16] [APX NDD] Support Intel APX NDD for legacy add insn Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:46 ` [PATCH 03/16] [APX NDD] Support APX NDD for optimization patterns of add Hongyu Wang
` (13 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu
Under APX NDD, previous TImode allocation will have issue that it was
originally allocated using continuous pair, like rax:rdi, rdi:rdx.
This will cause issue for all TImode NDD patterns. For NDD we will not
assume the arithmetic operations like add have dependency between dest
and src1, then write to 1st highpart rdi will be overrided by the 2nd
lowpart rdi if 2nd lowpart rdi have different src as input, then the write
to 1st highpart rdi will missed and cause miscompliation.
To resolve this, under TARGET_APX_NDD we'd only allow register with even
regno to be allocated with TImode, then TImode registers will be allocated
with non-overlapping pairs.
There could be some error for inline assembly if it forcely allocate __int128
with odd number general register.
gcc/ChangeLog:
* config/i386/i386.cc (ix86_hard_regno_mode_ok): Restrict even regno
for TImode if APX NDD enabled.
---
gcc/config/i386/i386.cc | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 683ac643bc8..3779d5b1206 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20824,6 +20824,16 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
return true;
return !can_create_pseudo_p ();
}
+ /* With TImode we previously have assumption that src1/dest will use same
+ register, so the allocation of highpart/lowpart can be consecutive, and
+ 2 TImode insn would held their low/highpart in continuous sequence like
+ rax:rdx, rdx:rcx. This will not work for APX_NDD since NDD allows
+ different registers as dest/src1, when writes to 2nd lowpart will impact
+ the writes to 1st highpart, then the insn will be optimized out. So for
+ TImode pattern if we support NDD form, the allowed register number should
+ be even to avoid such mixed high/low part override. */
+ else if (TARGET_APX_NDD && mode == TImode)
+ return regno % 2 == 0;
/* We handle both integer and floats in the general purpose registers. */
else if (VALID_INT_MODE_P (mode)
|| VALID_FP_MODE_P (mode))
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 03/16] [APX NDD] Support APX NDD for optimization patterns of add
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
2023-11-15 9:46 ` [PATCH 01/16] [APX NDD] Support Intel APX NDD for legacy add insn Hongyu Wang
2023-11-15 9:46 ` [PATCH 02/16] [APX NDD] Restrict TImode register usage when NDD enabled Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:46 ` [PATCH 04/16] [APX NDD] Disable seg_prefixed memory usage for NDD add Hongyu Wang
` (12 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu, Kong Lingling
From: Kong Lingling <lingling.kong@intel.com>
gcc/ChangeLog:
* config/i386/i386.md: (addsi_1_zext): Add new alternatives for NDD and
adjust output templates.
(*add<mode>_2): Likewise.
(*addsi_2_zext): Likewise.
(*add<mode>_3): Likewise.
(*addsi_3_zext): Likewise.
(*adddi_4): Likewise.
(*add<mode>_4): Likewise.
(*add<mode>_5): Likewise.
(*addv<mode>4): Likewise.
(*addv<mode>4_1): Likewise.
(*add<mode>3_cconly_overflow_1): Likewise.
(*add<mode>3_cc_overflow_1): Likewise.
(*addsi3_zext_cc_overflow_1): Likewise.
(*add<mode>3_cconly_overflow_2): Likewise.
(*add<mode>3_cc_overflow_2): Likewise.
(*addsi3_zext_cc_overflow_2): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add more test.
---
gcc/config/i386/i386.md | 314 +++++++++++++++---------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 53 ++--
2 files changed, 236 insertions(+), 131 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index daab634fea0..7ddb2cb2a71 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6420,12 +6420,13 @@ (define_insn "*add<mode>_1"
;; patterns constructed from addsi_1 to match.
(define_insn "addsi_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
(zero_extend:DI
- (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le"))))
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le,rBMe"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands,
+ ix86_can_use_ndd_p (PLUS))"
{
switch (get_attr_type (insn))
{
@@ -6434,11 +6435,13 @@ (define_insn "addsi_1_zext"
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
- return "inc{l}\t%k0";
+ return which_alternative == 3 ? "inc{l}\t{%1, %k0|%k0, %1}"
+ : "inc{l}\t%k0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return "dec{l}\t%k0";
+ return which_alternative == 3 ? "dec{l}\t{%1, %k0|%k0, %1}"
+ : "dec{l}\t%k0";
}
default:
@@ -6448,12 +6451,15 @@ (define_insn "addsi_1_zext"
std::swap (operands[1], operands[2]);
if (x86_maybe_negate_const_int (&operands[2], SImode))
- return "sub{l}\t{%2, %k0|%k0, %2}";
+ return which_alternative == 3 ? "sub{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
+ : "sub{l}\t{%2, %k0|%k0, %2}";
- return "add{l}\t{%2, %k0|%k0, %2}";
+ return which_alternative == 3 ? "add{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
+ : "add{l}\t{%2, %k0|%k0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,*,apx_ndd")
+ (set (attr "type")
(cond [(eq_attr "alternative" "2")
(const_string "lea")
(match_operand:SI 2 "incdec_operand")
@@ -6697,37 +6703,43 @@ (define_insn "*add<mode>_2"
[(set (reg FLAGS_REG)
(compare
(plus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,0"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>,rm,r")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,0,r<i>,<m>"))
(const_int 0)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>,r,r")
(plus:SWI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ && ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
{
+ bool use_ndd = (which_alternative == 3 || which_alternative == 4);
switch (get_attr_type (insn))
{
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
- return "inc{<imodesuffix>}\t%0";
+ return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "inc{<imodesuffix>}\t%0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return "dec{<imodesuffix>}\t%0";
+ return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "dec{<imodesuffix>}\t%0";
}
default:
if (which_alternative == 2)
std::swap (operands[1], operands[2]);
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
- return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
- return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
+ (set (attr "type")
(if_then_else (match_operand:SWI 2 "incdec_operand")
(const_string "incdec")
(const_string "alu")))
@@ -6742,23 +6754,27 @@ (define_insn "*add<mode>_2"
(define_insn "*addsi_2_zext"
[(set (reg FLAGS_REG)
(compare
- (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe,0"))
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r,r")
+ (set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (PLUS, SImode, operands)"
+ && ix86_binary_operator_ok (PLUS, SImode, operands,
+ ix86_can_use_ndd_p (PLUS))"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
- return "inc{l}\t%k0";
+ return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}"
+ : "inc{l}\t%k0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return "dec{l}\t%k0";
+ return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}"
+ : "dec{l}\t%k0";
}
default:
@@ -6766,12 +6782,15 @@ (define_insn "*addsi_2_zext"
std::swap (operands[1], operands[2]);
if (x86_maybe_negate_const_int (&operands[2], SImode))
- return "sub{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "sub{l}\t{%2, %k0|%k0, %2}";
- return "add{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "add{l}\t{%2, %k0|%k0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,apx_ndd")
+ (set (attr "type")
(if_then_else (match_operand:SI 2 "incdec_operand")
(const_string "incdec")
(const_string "alu")))
@@ -6785,35 +6804,40 @@ (define_insn "*addsi_2_zext"
(define_insn "*add<mode>_3"
[(set (reg FLAGS_REG)
(compare
- (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0"))
- (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")))
- (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
+ (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r")))
+ (clobber (match_scratch:SWI 0 "=<r>,<r>,r"))]
"ix86_match_ccmode (insn, CCZmode)
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
- return "inc{<imodesuffix>}\t%0";
+ return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "inc{<imodesuffix>}\t%0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return "dec{<imodesuffix>}\t%0";
+ return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "dec{<imodesuffix>}\t%0";
}
default:
if (which_alternative == 1)
std::swap (operands[1], operands[2]);
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
- return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
- return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,apx_ndd")
+ (set (attr "type")
(if_then_else (match_operand:SWI 2 "incdec_operand")
(const_string "incdec")
(const_string "alu")))
@@ -6828,22 +6852,24 @@ (define_insn "*add<mode>_3"
(define_insn "*addsi_3_zext"
[(set (reg FLAGS_REG)
(compare
- (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0"))
- (match_operand:SI 1 "nonimmediate_operand" "%0,r")))
- (set (match_operand:DI 0 "register_operand" "=r,r")
+ (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")))
+ (set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
- && ix86_binary_operator_ok (PLUS, SImode, operands)"
+ && ix86_binary_operator_ok (PLUS, SImode, operands,
+ ix86_can_use_ndd_p (PLUS))"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
- return "inc{l}\t%k0";
+ return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" : "inc{l}\t%k0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return "dec{l}\t%k0";
+ return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" : "dec{l}\t%k0";
}
default:
@@ -6851,12 +6877,15 @@ (define_insn "*addsi_3_zext"
std::swap (operands[1], operands[2]);
if (x86_maybe_negate_const_int (&operands[2], SImode))
- return "sub{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "sub{l}\t{%2, %k0|%k0, %2}";
- return "add{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "add{l}\t{%2, %k0|%k0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,apx_ndd")
+ (set (attr "type")
(if_then_else (match_operand:SI 2 "incdec_operand")
(const_string "incdec")
(const_string "alu")))
@@ -6877,31 +6906,35 @@ (define_insn "*addsi_3_zext"
(define_insn "*adddi_4"
[(set (reg FLAGS_REG)
(compare
- (match_operand:DI 1 "nonimmediate_operand" "0")
- (match_operand:DI 2 "x86_64_immediate_operand" "e")))
- (clobber (match_scratch:DI 0 "=r"))]
+ (match_operand:DI 1 "nonimmediate_operand" "0,r")
+ (match_operand:DI 2 "x86_64_immediate_operand" "e,e")))
+ (clobber (match_scratch:DI 0 "=r,r"))]
"TARGET_64BIT
&& ix86_match_ccmode (insn, CCGCmode)"
{
+ bool use_ndd = (which_alternative == 1);
switch (get_attr_type (insn))
{
case TYPE_INCDEC:
if (operands[2] == constm1_rtx)
- return "inc{q}\t%0";
+ return use_ndd ? "inc{q}\t{%1, %0|%0, %1}" : "inc{q}\t%0";
else
{
gcc_assert (operands[2] == const1_rtx);
- return "dec{q}\t%0";
+ return use_ndd ? "dec{q}\t{%1, %0|%0, %1}" : "dec{q}\t%0";
}
default:
if (x86_maybe_negate_const_int (&operands[2], DImode))
- return "add{q}\t{%2, %0|%0, %2}";
+ return use_ndd ? "add{q}\t{%2, %1, %0|%0, %1, %2}"
+ : "add{q}\t{%2, %0|%0, %2}";
- return "sub{q}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sub{q}\t{%2, %1, %0|%0, %1, %2}"
+ : "sub{q}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,apx_ndd")
+ (set (attr "type")
(if_then_else (match_operand:DI 2 "incdec_operand")
(const_string "incdec")
(const_string "alu")))
@@ -6922,30 +6955,36 @@ (define_insn "*adddi_4"
(define_insn "*add<mode>_4"
[(set (reg FLAGS_REG)
(compare
- (match_operand:SWI124 1 "nonimmediate_operand" "0")
+ (match_operand:SWI124 1 "nonimmediate_operand" "0,r")
(match_operand:SWI124 2 "const_int_operand")))
- (clobber (match_scratch:SWI124 0 "=<r>"))]
+ (clobber (match_scratch:SWI124 0 "=<r>,r"))]
"ix86_match_ccmode (insn, CCGCmode)"
{
+ bool use_ndd = (which_alternative == 1);
switch (get_attr_type (insn))
{
case TYPE_INCDEC:
if (operands[2] == constm1_rtx)
- return "inc{<imodesuffix>}\t%0";
+ return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "inc{<imodesuffix>}\t%0";
else
{
gcc_assert (operands[2] == const1_rtx);
- return "dec{<imodesuffix>}\t%0";
+ return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "dec{<imodesuffix>}\t%0";
}
default:
if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
- return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
- return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,apx_ndd")
+ (set (attr "type")
(if_then_else (match_operand:<MODE> 2 "incdec_operand")
(const_string "incdec")
(const_string "alu")))
@@ -6960,36 +6999,41 @@ (define_insn "*add<mode>_5"
[(set (reg FLAGS_REG)
(compare
(plus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")
- (match_operand:SWI 2 "<general_operand>" "<g>,0"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r")
+ (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>"))
(const_int 0)))
- (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
+ (clobber (match_scratch:SWI 0 "=<r>,<r>,r"))]
"ix86_match_ccmode (insn, CCGOCmode)
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
- return "inc{<imodesuffix>}\t%0";
+ return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "inc{<imodesuffix>}\t%0";
else
{
gcc_assert (operands[2] == constm1_rtx);
- return "dec{<imodesuffix>}\t%0";
+ return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "dec{<imodesuffix>}\t%0";
}
default:
if (which_alternative == 1)
std::swap (operands[1], operands[2]);
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
- return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
- return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,apx_ndd")
+ (set (attr "type")
(if_then_else (match_operand:SWI 2 "incdec_operand")
(const_string "incdec")
(const_string "alu")))
@@ -7169,35 +7213,46 @@ (define_insn "*addv<mode>4"
[(set (reg:CCO FLAGS_REG)
(eq:CCO (plus:<DWI>
(sign_extend:<DWI>
- (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
(sign_extend:<DWI>
- (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
+ (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
(sign_extend:<DWI>
(plus:SWI (match_dup 1) (match_dup 2)))))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(plus:SWI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "add{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
+ {
+ if (which_alternative == 2 || which_alternative == 3)
+ return "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+ }
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "addv<mode>4_1"
[(set (reg:CCO FLAGS_REG)
(eq:CCO (plus:<DWI>
(sign_extend:<DWI>
- (match_operand:SWI 1 "nonimmediate_operand" "0"))
+ (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
(match_operand:<DWI> 3 "const_int_operand"))
(sign_extend:<DWI>
(plus:SWI
(match_dup 1)
- (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
(plus:SWI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))
&& CONST_INT_P (operands[2])
&& INTVAL (operands[2]) == INTVAL (operands[3])"
- "add{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "@
+ add{<imodesuffix>}\t{%2, %0|%0, %2}
+ add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")
(set (attr "length_immediate")
(cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
@@ -8944,27 +8999,37 @@ (define_insn "*add<mode>3_cconly_overflow_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "%0")
- (match_operand:SWI 2 "<general_operand>" "<g>"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,r")
+ (match_operand:SWI 2 "<general_operand>" "<g>,<g>"))
(match_dup 1)))
- (clobber (match_scratch:SWI 0 "=<r>"))]
+ (clobber (match_scratch:SWI 0 "=<r>,r"))]
"!(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "add{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "@
+ add{<imodesuffix>}\t{%2, %0|%0, %2}
+ add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "@add<mode>3_cc_overflow_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
(match_dup 1)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(plus:SWI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "add{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
+{
+ if (which_alternative == 2 || which_alternative == 3)
+ return "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_peephole2
@@ -9009,55 +9074,72 @@ (define_insn "*addsi3_zext_cc_overflow_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:SI
- (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe"))
(match_dup 1)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
- "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
- "add{l}\t{%2, %k0|%k0, %2}"
- [(set_attr "type" "alu")
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands,
+ ix86_can_use_ndd_p (PLUS))"
+ "@
+ add{l}\t{%2, %k0|%k0, %2}
+ add{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "SI")])
(define_insn "*add<mode>3_cconly_overflow_2"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "%0")
- (match_operand:SWI 2 "<general_operand>" "<g>"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,r")
+ (match_operand:SWI 2 "<general_operand>" "<g>,<g>"))
(match_dup 2)))
- (clobber (match_scratch:SWI 0 "=<r>"))]
+ (clobber (match_scratch:SWI 0 "=<r>,r"))]
"!(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "add{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "@
+ add{<imodesuffix>}\t{%2, %0|%0, %2}
+ add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "*add<mode>3_cc_overflow_2"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
(match_dup 2)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(plus:SWI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "add{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
+ "@
+ add{<imodesuffix>}\t{%2, %0|%0, %2}
+ add{<imodesuffix>}\t{%2, %0|%0, %2}
+ add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "*addsi3_zext_cc_overflow_2"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:SI
- (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe"))
(match_dup 2)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
- "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
- "add{l}\t{%2, %k0|%k0, %2}"
- [(set_attr "type" "alu")
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands,
+ ix86_can_use_ndd_p (PLUS))"
+ "@
+ add{l}\t{%2, %k0|%k0, %2}
+ add{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "SI")])
(define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index dd3dc78e52f..6c136174d24 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -2,20 +2,43 @@
/* { dg-options "-mapxf -O2" } */
/* { dg-final { scan-assembler-not "movl"} } */
-int foo (int *a)
-{
- int b = *a - 1;
- return b;
-}
+#define FOO(TYPE, OP_NAME, OP) \
+TYPE \
+__attribute__ ((noipa)) \
+foo_##OP_NAME##_##TYPE (TYPE *a) \
+{ \
+ TYPE b = *a OP 1; \
+ return b; \
+}
-int foo2 (int a, int b)
-{
- int c = a + b;
- return c;
-}
+#define FOO1(TYPE, OP_NAME, OP) \
+TYPE \
+__attribute__ ((noipa)) \
+foo1_##OP_NAME##_##TYPE (TYPE a, TYPE b) \
+{ \
+ TYPE c = a OP b; \
+ return c; \
+}
+
+#define FOO2(TYPE, OP_NAME, OP) \
+TYPE \
+__attribute__ ((noipa)) \
+foo2_##OP_NAME##_##TYPE (TYPE *a, TYPE b) \
+{ \
+ TYPE c = *a OP b; \
+ return c; \
+}
+
+FOO (char, add, +)
+FOO1 (char, add, +)
+FOO2 (char, add, +)
+FOO (short, add, +)
+FOO1 (short, add, +)
+FOO2 (short, add, +)
+FOO (int, add, +)
+FOO1 (int, add, +)
+FOO2 (int, add, +)
+FOO (long, add, +)
+FOO1 (long, add, +)
+FOO2 (long, add, +)
-int foo3 (int *a, int b)
-{
- int c = *a + b;
- return c;
-}
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 04/16] [APX NDD] Disable seg_prefixed memory usage for NDD add
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (2 preceding siblings ...)
2023-11-15 9:46 ` [PATCH 03/16] [APX NDD] Support APX NDD for optimization patterns of add Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:46 ` [PATCH 05/16] [APX NDD] Support APX NDD for adc insns Hongyu Wang
` (11 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu
NDD uses evex prefix, so when segment prefix is also applied, the instruction
could excceed its 15byte limit, especially adding immediates. This could happen
when "e" constraint accepts any UNSPEC_TPOFF/UNSPEC_NTPOFF constant and it will
add the offset to segment register, which will be encoded using segment prefix.
Disable those *POFF constant usage in NDD add alternatives with new constraint.
gcc/ChangeLog:
* config/i386/constraints.md (je): New constraint.
* config/i386/i386-protos.h (x86_no_poff_operand_p): New function to
check any *POFF constant.
* config/i386/i386.cc (x86_no_poff_operand_p): New prototype.
* config/i386/i386.md (*add<mode>_1): Split out je alternative for add.
---
gcc/config/i386/constraints.md | 5 +++++
gcc/config/i386/i386-protos.h | 1 +
gcc/config/i386/i386.cc | 25 +++++++++++++++++++++++++
gcc/config/i386/i386.md | 10 +++++-----
4 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index cbee31fa40a..c6b51324294 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -433,3 +433,8 @@ (define_address_constraint "jb"
(define_register_constraint "jc"
"TARGET_APX_EGPR && !TARGET_AVX ? GENERAL_GPR16 : GENERAL_REGS")
+
+(define_constraint "je"
+ "@internal constant that do not allow any unspec global offsets"
+ (and (match_operand 0 "x86_64_immediate_operand")
+ (match_test "x86_no_poff_operand_p (op)")))
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 3e08eae4e79..5d902e2925b 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -66,6 +66,7 @@ extern bool x86_extended_QIreg_mentioned_p (rtx_insn *);
extern bool x86_extended_reg_mentioned_p (rtx);
extern bool x86_extended_rex2reg_mentioned_p (rtx);
extern bool x86_evex_reg_mentioned_p (rtx [], int);
+extern bool x86_no_poff_operand_p (rtx);
extern bool x86_maybe_negate_const_int (rtx *, machine_mode);
extern machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3779d5b1206..47159b06f7d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23292,6 +23292,31 @@ x86_evex_reg_mentioned_p (rtx operands[], int nops)
return false;
}
+/* Return true when rtx operand does not contain any UNSPEC_*POFF related
+ constant to avoid APX_NDD instructions excceed encoding length limit. */
+bool
+x86_no_poff_operand_p (rtx operand)
+{
+ if (GET_CODE (operand) == CONST)
+ {
+ rtx op = XEXP (operand, 0);
+ if (GET_CODE (op) == PLUS)
+ op = XEXP (op, 0);
+
+ if (GET_CODE (op) == UNSPEC)
+ {
+ int unspec = XINT (op, 1);
+ return (unspec != UNSPEC_NTPOFF
+ && unspec != UNSPEC_TPOFF
+ && unspec != UNSPEC_DTPOFF
+ && unspec != UNSPEC_GOTTPOFF
+ && unspec != UNSPEC_GOTNTPOFF
+ && unspec != UNSPEC_INDNTPOFF);
+ }
+ }
+ return true;
+}
+
/* If profitable, negate (without causing overflow) integer constant
of mode MODE at location LOC. Return true in this case. */
bool
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7ddb2cb2a71..ecd06625a7d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6359,15 +6359,15 @@ (define_insn_and_split "*add<dwi>3_doubleword_concat_zext"
"split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
(define_insn "*add<mode>_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r,r")
(plus:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rm,r")
- (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,re,BM")))
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rm,r,m,r")
+ (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,je,BM")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
ix86_can_use_ndd_p (PLUS))"
{
- bool use_ndd = (which_alternative == 4 || which_alternative == 5);
+ bool use_ndd = (which_alternative >= 4);
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -6398,7 +6398,7 @@ (define_insn "*add<mode>_1"
: "add{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
+ [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd")
(set (attr "type")
(cond [(eq_attr "alternative" "3")
(const_string "lea")
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 05/16] [APX NDD] Support APX NDD for adc insns
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (3 preceding siblings ...)
2023-11-15 9:46 ` [PATCH 04/16] [APX NDD] Disable seg_prefixed memory usage for NDD add Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:46 ` [PATCH 06/16] [APX NDD] Support APX NDD for sub insns Hongyu Wang
` (10 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu, Kong Lingling
From: Kong Lingling <lingling.kong@intel.com>
Legacy adc patterns are commonly adopted to TImode add, when extending TImode
add to NDD version, operands[0] and operands[1] can be different, so extra move
should be emitted if those patterns have optimization when adding const0_rtx.
gcc/ChangeLog:
* config/i386/i386.md (*add<dwi>3_doubleword): Add ndd constraints, and
move operands[1] to operands[0] when they are not equal.
(*add<dwi>3_doubleword_cc_overflow_1): Likewise.
(*add<dwi>3_doubleword_zext): Add ndd constraints.
(*addv<dwi>4_doubleword): Likewise.
(*addv<dwi>4_doubleword_1): Likewise.
(addv<mode>4_overflow_1): Likewise.
(*addv<mode>4_overflow_2): Likewise.
(@add<mode>3_carry): Likewise.
(*add<mode>3_carry_0): Likewise.
(*addsi3_carry_zext): Likewise.
(*addsi3_carry_zext_0): Likewise.
(addcarry<mode>): Likewise.
(addcarry<mode>_0): Likewise.
(*addcarry<mode>_1): Likewise.
(*add<mode>3_eq): Likewise.
(*add<mode>3_ne): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd-adc.c: New test.
---
gcc/config/i386/i386.md | 203 +++++++++++++-------
gcc/testsuite/gcc.target/i386/apx-ndd-adc.c | 15 ++
2 files changed, 146 insertions(+), 72 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-adc.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ecd06625a7d..f23859d1172 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6235,12 +6235,13 @@ (define_expand "add<mode>3"
ix86_can_use_ndd_p (PLUS)); DONE;")
(define_insn_and_split "*add<dwi>3_doubleword"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(plus:<DWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,r")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (PLUS, <DWI>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
"#"
"&& reload_completed"
[(parallel [(set (reg:CCC FLAGS_REG)
@@ -6260,24 +6261,35 @@ (define_insn_and_split "*add<dwi>3_doubleword"
split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
if (operands[2] == const0_rtx)
{
+ /* Under NDD op0 and op1 may not equal, do not delete insn then. */
+
+ bool emit_insn_deleted_note_p = true;
+ if (!rtx_equal_p (operands[0], operands[1]))
+ {
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn_deleted_note_p = false;
+ }
if (operands[5] != const0_rtx)
- ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3]);
+ ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3],
+ ix86_can_use_ndd_p (PLUS));
else if (!rtx_equal_p (operands[3], operands[4]))
emit_move_insn (operands[3], operands[4]);
- else
+ else if (emit_insn_deleted_note_p)
emit_note (NOTE_INSN_DELETED);
DONE;
}
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn_and_split "*add<dwi>3_doubleword_zext"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,r,r")
(plus:<DWI>
(zero_extend:<DWI>
- (match_operand:DWIH 2 "nonimmediate_operand" "rm,r"))
- (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")))
+ (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,m")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
"#"
"&& reload_completed"
[(parallel [(set (reg:CCC FLAGS_REG)
@@ -6293,7 +6305,8 @@ (define_insn_and_split "*add<dwi>3_doubleword_zext"
(match_dup 4))
(const_int 0)))
(clobber (reg:CC FLAGS_REG))])]
- "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
+ "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn_and_split "*add<dwi>3_doubleword_concat"
[(set (match_operand:<DWI> 0 "register_operand" "=&r")
@@ -7269,14 +7282,15 @@ (define_insn_and_split "*addv<dwi>4_doubleword"
(eq:CCO
(plus:<QPWI>
(sign_extend:<QPWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r"))
(sign_extend:<QPWI>
- (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
+ (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
(sign_extend:<QPWI>
(plus:<DWI> (match_dup 1) (match_dup 2)))))
- (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(plus:<DWI> (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (PLUS, <DWI>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
"#"
"&& reload_completed"
[(parallel [(set (reg:CCC FLAGS_REG)
@@ -7306,22 +7320,24 @@ (define_insn_and_split "*addv<dwi>4_doubleword"
(match_dup 5)))])]
{
split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn_and_split "*addv<dwi>4_doubleword_1"
[(set (reg:CCO FLAGS_REG)
(eq:CCO
(plus:<QPWI>
(sign_extend:<QPWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "%0"))
- (match_operand:<QPWI> 3 "const_scalar_int_operand" "n"))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "%0,rm"))
+ (match_operand:<QPWI> 3 "const_scalar_int_operand" "n,n"))
(sign_extend:<QPWI>
(plus:<DWI>
(match_dup 1)
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
- (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
+ (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
(plus:<DWI> (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)
+ "ix86_binary_operator_ok (PLUS, <DWI>mode, operands,
+ ix86_can_use_ndd_p (PLUS))
&& CONST_SCALAR_INT_P (operands[2])
&& rtx_equal_p (operands[2], operands[3])"
"#"
@@ -7359,7 +7375,8 @@ (define_insn_and_split "*addv<dwi>4_doubleword_1"
operands[5]));
DONE;
}
-})
+}
+[(set_attr "isa" "*,apx_ndd")])
(define_insn "*addv<mode>4_overflow_1"
[(set (reg:CCO FLAGS_REG)
@@ -7369,9 +7386,9 @@ (define_insn "*addv<mode>4_overflow_1"
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(sign_extend:<DWI>
- (match_operand:SWI 1 "nonimmediate_operand" "%0,0")))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")))
(sign_extend:<DWI>
- (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
+ (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
(sign_extend:<DWI>
(plus:SWI
(plus:SWI
@@ -7379,15 +7396,22 @@ (define_insn "*addv<mode>4_overflow_1"
[(match_dup 3) (const_int 0)])
(match_dup 1))
(match_dup 2)))))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
(plus:SWI
(plus:SWI
(match_op_dup 5 [(match_dup 3) (const_int 0)])
(match_dup 1))
(match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
+{
+ if (which_alternative == 2 || which_alternative == 3)
+ return "adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "adc{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "*addv<mode>4_overflow_2"
@@ -7398,26 +7422,30 @@ (define_insn "*addv<mode>4_overflow_2"
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(sign_extend:<DWI>
- (match_operand:SWI 1 "nonimmediate_operand" "%0")))
- (match_operand:<DWI> 6 "const_int_operand" "n"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,rm")))
+ (match_operand:<DWI> 6 "const_int_operand" "n,n"))
(sign_extend:<DWI>
(plus:SWI
(plus:SWI
(match_operator:SWI 5 "ix86_carry_flag_operator"
[(match_dup 3) (const_int 0)])
(match_dup 1))
- (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
+ (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
(plus:SWI
(plus:SWI
(match_op_dup 5 [(match_dup 3) (const_int 0)])
(match_dup 1))
(match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))
&& CONST_INT_P (operands[2])
&& INTVAL (operands[2]) == INTVAL (operands[6])"
- "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "@
+ adc{<imodesuffix>}\t{%2, %0|%0, %2}
+ adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")
(set (attr "length_immediate")
(if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
@@ -8143,17 +8171,24 @@ (define_insn "*subsi_3_zext"
;; Add with carry and subtract with borrow
(define_insn "@add<mode>3_carry"
- [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(plus:SWI
(plus:SWI
(match_operator:SWI 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
- (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
+{
+ if (which_alternative == 2 || which_alternative == 3)
+ return "adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "adc{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
@@ -8240,31 +8275,38 @@ (define_insn "*add<mode>3_carry_0r"
(set_attr "mode" "<MODE>")])
(define_insn "*addsi3_carry_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
(plus:SI
(plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
[(reg FLAGS_REG) (const_int 0)])
- (match_operand:SI 1 "register_operand" "%0"))
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+ (match_operand:SI 1 "register_operand" "%0i,r"))
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
- "adc{l}\t{%2, %k0|%k0, %2}"
- [(set_attr "type" "alu")
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands,
+ ix86_can_use_ndd_p (PLUS))"
+ "@
+ adc{l}\t{%2, %k0|%k0, %2}
+ adc{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
(define_insn "*addsi3_carry_zext_0"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
(plus:SI (match_operator:SI 2 "ix86_carry_flag_operator"
[(reg FLAGS_REG) (const_int 0)])
- (match_operand:SI 1 "register_operand" "0"))))
+ (match_operand:SI 1 "register_operand" "0,r"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT"
- "adc{l}\t{$0, %k0|%k0, 0}"
- [(set_attr "type" "alu")
+ "@
+ adc{l}\t{$0, %k0|%k0, 0}
+ adc{l}\t{$0, %1, %k0|%k0, %1, 0}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
@@ -8293,20 +8335,26 @@ (define_insn "addcarry<mode>"
(plus:SWI48
(match_operator:SWI48 5 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
- (match_operand:SWI48 1 "nonimmediate_operand" "%0,0"))
- (match_operand:SWI48 2 "nonimmediate_operand" "r,rm")))
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,rm,r"))
+ (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,r,m")))
(plus:<DWI>
(zero_extend:<DWI> (match_dup 2))
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_dup 3) (const_int 0)]))))
- (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+ (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
(plus:SWI48 (plus:SWI48 (match_op_dup 5
[(match_dup 3) (const_int 0)])
(match_dup 1))
(match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
+ "@
+ adc{<imodesuffix>}\t{%2, %0|%0, %2}
+ adc{<imodesuffix>}\t{%2, %0|%0, %2}
+ adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
@@ -8464,7 +8512,8 @@ (define_expand "addcarry<mode>_0"
(match_dup 1)))
(set (match_operand:SWI48 0 "nonimmediate_operand")
(plus:SWI48 (match_dup 1) (match_dup 2)))])]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)")
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))")
(define_insn "*addcarry<mode>_1"
[(set (reg:CCC FLAGS_REG)
@@ -8474,18 +8523,19 @@ (define_insn "*addcarry<mode>_1"
(plus:SWI48
(match_operator:SWI48 5 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
- (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
- (match_operand:SWI48 2 "x86_64_immediate_operand" "e")))
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,rm"))
+ (match_operand:SWI48 2 "x86_64_immediate_operand" "e,e")))
(plus:<DWI>
(match_operand:<DWI> 6 "const_scalar_int_operand")
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_dup 3) (const_int 0)]))))
- (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+ (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
(plus:SWI48 (plus:SWI48 (match_op_dup 5
[(match_dup 3) (const_int 0)])
(match_dup 1))
(match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))
&& CONST_INT_P (operands[2])
/* Check that operands[6] is operands[2] zero extended from
<MODE>mode to <DWI>mode. */
@@ -8498,8 +8548,11 @@ (define_insn "*addcarry<mode>_1"
&& ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0)
== UINTVAL (operands[2]))
&& CONST_WIDE_INT_ELT (operands[6], 1) == 0))"
- "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "@
+ adc{<imodesuffix>}\t{%2, %0|%0, %2}
+ adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")
@@ -9146,12 +9199,13 @@ (define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:<DWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o"))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o"))
(match_dup 1)))
- (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(plus:<DWI> (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (PLUS, <DWI>mode, operands,
+ ix86_can_use_ndd_p (PLUS))"
"#"
"&& reload_completed"
[(parallel [(set (reg:CCC FLAGS_REG)
@@ -9180,6 +9234,8 @@ (define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
if (operands[2] == const0_rtx)
{
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], operands[5]));
DONE;
}
@@ -9188,7 +9244,8 @@ (define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
operands[5], <MODE>mode);
else
operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]);
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
;; x == 0 with zero flag test can be done also as x < 1U with carry flag
;; test, where the latter is preferrable if we have some carry consuming
@@ -9203,7 +9260,8 @@ (define_insn_and_split "*add<mode>3_eq"
(match_operand:SWI 1 "nonimmediate_operand"))
(match_operand:SWI 2 "<general_operand>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
@@ -9227,7 +9285,8 @@ (define_insn_and_split "*add<mode>3_ne"
"CONST_INT_P (operands[2])
&& (<MODE>mode != DImode
|| INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
- && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+ && ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (PLUS))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-adc.c b/gcc/testsuite/gcc.target/i386/apx-ndd-adc.c
new file mode 100644
index 00000000000..9d5991457da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd-adc.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { int128 && { ! ia32 } } } } */
+/* { dg-options "-mapxf -O2" } */
+
+#include "pr91681-1.c"
+// *addti3_doubleword
+// *addti3_doubleword_zext
+// *adddi3_cc_overflow_1
+// *adddi3_carry
+
+int foo3 (int *a, int b)
+{
+ int c = *a + b + (a > b); /* { dg-warning "comparison between pointer and integer" } */
+ return c;
+}
+/* { dg-final { scan-assembler-not "xor" } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 06/16] [APX NDD] Support APX NDD for sub insns
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (4 preceding siblings ...)
2023-11-15 9:46 ` [PATCH 05/16] [APX NDD] Support APX NDD for adc insns Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:46 ` [PATCH 07/16] [APX NDD] Support APX NDD for sbb insn Hongyu Wang
` (9 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu, Kong Lingling
From: Kong Lingling <lingling.kong@intel.com>
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_fixup_binary_operands_no_copy):
Add use_ndd parameter.
(ix86_can_use_ndd_p): ADD MINUS.
* config/i386/i386-protos.h (ix86_fixup_binary_operands_no_copy):
Change define.
* config/i386/i386.md (sub<mode>3): Add NDD constraints.
(*sub<mode>_1): Likewise.
(*subsi_1_zext): Likewise.
(*sub<mode>_2): Likewise.
(*subsi_2_zext): Likewise.
(subv<mode>4): Likewise.
(*subv<mode>4): Likewise.
(subv<mode>4_1): Likewise.
(usubv<mode>4): Likewise.
(*sub<mode>_3): Likewise.
(*subsi_3_zext): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add test for ndd sub.
[APX NDD] Support APX NDD for more optimized sub insn
gcc/ChangeLog:
* config/i386/i386.md
---
gcc/config/i386/i386-expand.cc | 6 +-
gcc/config/i386/i386-protos.h | 2 +-
gcc/config/i386/i386.md | 152 ++++++++++++++++--------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 13 ++
4 files changed, 118 insertions(+), 55 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index ea0e5881087..e5f75875e3b 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1270,6 +1270,7 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
switch (code)
{
case PLUS:
+ case MINUS:
return true;
default:
return false;
@@ -1342,9 +1343,10 @@ ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
void
ix86_fixup_binary_operands_no_copy (enum rtx_code code,
- machine_mode mode, rtx operands[])
+ machine_mode mode, rtx operands[],
+ bool use_ndd)
{
- rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+ rtx dst = ix86_fixup_binary_operands (code, mode, operands, use_ndd);
gcc_assert (dst == operands[0]);
}
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 5d902e2925b..ad895fac72d 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -111,7 +111,7 @@ extern void ix86_expand_vector_move_misalign (machine_mode, rtx[]);
extern rtx ix86_fixup_binary_operands (enum rtx_code,
machine_mode, rtx[], bool = false);
extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
- machine_mode, rtx[]);
+ machine_mode, rtx[], bool = false);
extern void ix86_expand_binary_operator (enum rtx_code,
machine_mode, rtx[], bool = false);
extern void ix86_expand_vector_logical_operator (enum rtx_code,
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index f23859d1172..1aa8469d666 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -7637,7 +7637,8 @@ (define_expand "sub<mode>3"
(minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
(match_operand:SDWIM 2 "<general_hilo_operand>")))]
""
- "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;")
+ "ix86_expand_binary_operator (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS)); DONE;")
(define_insn_and_split "*sub<dwi>3_doubleword"
[(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
@@ -7663,7 +7664,10 @@ (define_insn_and_split "*sub<dwi>3_doubleword"
split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
if (operands[2] == const0_rtx)
{
- ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3]);
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3],
+ ix86_can_use_ndd_p (MINUS));
DONE;
}
})
@@ -7692,25 +7696,35 @@ (define_insn_and_split "*sub<dwi>3_doubleword_zext"
"split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
(define_insn "*sub<mode>_1"
- [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>i,r,r")
(minus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "0,0")
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
+ (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
- "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "*subsi_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
- (minus:SI (match_operand:SI 1 "register_operand" "0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+ (minus:SI (match_operand:SI 1 "register_operand" "0,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
- "sub{l}\t{%2, %k0|%k0, %2}"
- [(set_attr "type" "alu")
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sub{l}\t{%2, %k0|%k0, %2}
+ sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "SI")])
;; Alternative 1 is needed to work around LRA limitation, see PR82524.
@@ -7738,31 +7752,41 @@ (define_insn "*sub<mode>_2"
[(set (reg FLAGS_REG)
(compare
(minus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "0,0")
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+ (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
(const_int 0)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(minus:SWI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
- "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "*subsi_2_zext"
[(set (reg FLAGS_REG)
(compare
- (minus:SI (match_operand:SI 1 "register_operand" "0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+ (minus:SI (match_operand:SI 1 "register_operand" "0,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
(minus:SI (match_dup 1)
(match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (MINUS, SImode, operands)"
- "sub{l}\t{%2, %k0|%k0, %2}"
- [(set_attr "type" "alu")
+ && ix86_binary_operator_ok (MINUS, SImode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sub{l}\t{%2, %k0|%k0, %2}
+ sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "SI")])
(define_insn "*subqi_ext<mode>_0"
@@ -7841,7 +7865,8 @@ (define_expand "subv<mode>4"
(pc)))]
""
{
- ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
+ ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS));
if (CONST_SCALAR_INT_P (operands[2]))
operands[4] = operands[2];
else
@@ -7852,35 +7877,45 @@ (define_insn "*subv<mode>4"
[(set (reg:CCO FLAGS_REG)
(eq:CCO (minus:<DWI>
(sign_extend:<DWI>
- (match_operand:SWI 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r"))
(sign_extend:<DWI>
- (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
+ (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
(sign_extend:<DWI>
(minus:SWI (match_dup 1) (match_dup 2)))))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(minus:SWI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
- "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "subv<mode>4_1"
[(set (reg:CCO FLAGS_REG)
(eq:CCO (minus:<DWI>
(sign_extend:<DWI>
- (match_operand:SWI 1 "nonimmediate_operand" "0"))
+ (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
(match_operand:<DWI> 3 "const_int_operand"))
(sign_extend:<DWI>
(minus:SWI
(match_dup 1)
- (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
(minus:SWI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))
&& CONST_INT_P (operands[2])
&& INTVAL (operands[2]) == INTVAL (operands[3])"
- "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "@
+ sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")
(set (attr "length_immediate")
(cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
@@ -7976,6 +8011,8 @@ (define_insn_and_split "*subv<dwi>4_doubleword_1"
split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
if (operands[2] == const0_rtx)
{
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
operands[5]));
DONE;
@@ -8057,18 +8094,25 @@ (define_expand "usubv<mode>4"
(label_ref (match_operand 3))
(pc)))]
""
- "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
+ "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS));")
(define_insn "*sub<mode>_3"
[(set (reg FLAGS_REG)
- (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>i,r,r")
(minus:SWI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCmode)
- && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
- "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ sub{<imodesuffix>}\t{%2, %0|%0, %2}
+ sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_peephole2
@@ -8156,16 +8200,20 @@ (define_insn_and_split "*dec_cmov<mode>"
(define_insn "*subsi_3_zext"
[(set (reg FLAGS_REG)
- (compare (match_operand:SI 1 "register_operand" "0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe")))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (compare (match_operand:SI 1 "register_operand" "0,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe")))
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
(minus:SI (match_dup 1)
(match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
- && ix86_binary_operator_ok (MINUS, SImode, operands)"
- "sub{l}\t{%2, %1|%1, %2}"
- [(set_attr "type" "alu")
+ && ix86_binary_operator_ok (MINUS, SImode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sub{l}\t{%2, %1|%1, %2}
+ sub{l}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "SI")])
\f
;; Add with carry and subtract with borrow
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 6c136174d24..9d444e0d830 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -42,3 +42,16 @@ FOO (long, add, +)
FOO1 (long, add, +)
FOO2 (long, add, +)
+FOO (char, sub, -)
+FOO1 (char, sub, -)
+FOO (short, sub, -)
+FOO1 (short, sub, -)
+FOO (int, sub, -)
+FOO1 (int, sub, -)
+FOO (long, sub, -)
+FOO1 (long, sub, -)
+/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "sub(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "sub(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 07/16] [APX NDD] Support APX NDD for sbb insn
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (5 preceding siblings ...)
2023-11-15 9:46 ` [PATCH 06/16] [APX NDD] Support APX NDD for sub insns Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:46 ` [PATCH 08/16] [APX NDD] Support APX NDD for neg insn Hongyu Wang
` (8 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu, Kong Lingling
From: Kong Lingling <lingling.kong@intel.com>
Similar to *add<dwi>3_doubleword, operands[1] may not equal to operands[0] so
extra move is required.
gcc/ChangeLog:
* config/i386/i386.md (*sub<dwi>3_doubleword): Add ndd constraints, and
emit move when operands[0] not equal to operands[1].
(*sub<dwi>3_doubleword_zext): Likewise.
(*subv<dwi>4_doubleword): Likewise.
(*subv<dwi>4_doubleword_1): Likewise.
(*subv<mode>4_overflow_1): Likewise.
(*subv<mode>4_overflow_2): Likewise.
(*addsi3_carry_zext_0r): Likewise.
(@sub<mode>3_carry): Add NDD alternatives and adjust output templates.
(*subsi3_carry_zext): Likewise.
(subborrow<mode>): Likewise.
(subborrow<mode>_0): Likewise.
(*sub<mode>3_eq): Likewise.
(*sub<mode>3_ne): Likewise.
(*sub<mode>3_eq_1): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd-sbb.c: New test.
---
gcc/config/i386/i386.md | 159 ++++++++++++--------
gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c | 6 +
2 files changed, 106 insertions(+), 59 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1aa8469d666..c3dcfaf52e1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -7641,12 +7641,13 @@ (define_expand "sub<mode>3"
ix86_can_use_ndd_p (MINUS)); DONE;")
(define_insn_and_split "*sub<dwi>3_doubleword"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(minus:<DWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
"#"
"&& reload_completed"
[(parallel [(set (reg:CC FLAGS_REG)
@@ -7670,16 +7671,18 @@ (define_insn_and_split "*sub<dwi>3_doubleword"
ix86_can_use_ndd_p (MINUS));
DONE;
}
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn_and_split "*sub<dwi>3_doubleword_zext"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,r,r")
(minus:<DWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
+ (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,o")
(zero_extend:<DWI>
- (match_operand:DWIH 2 "nonimmediate_operand" "rm,r"))))
+ (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
"#"
"&& reload_completed"
[(parallel [(set (reg:CC FLAGS_REG)
@@ -7693,7 +7696,8 @@ (define_insn_and_split "*sub<dwi>3_doubleword_zext"
(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
(const_int 0)))
(clobber (reg:CC FLAGS_REG))])]
- "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
+ "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn "*sub<mode>_1"
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>i,r,r")
@@ -7929,14 +7933,15 @@ (define_insn_and_split "*subv<dwi>4_doubleword"
(eq:CCO
(minus:<QPWI>
(sign_extend:<QPWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "0,0"))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r"))
(sign_extend:<QPWI>
- (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
+ (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
(sign_extend:<QPWI>
(minus:<DWI> (match_dup 1) (match_dup 2)))))
- (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(minus:<DWI> (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
"#"
"&& reload_completed"
[(parallel [(set (reg:CC FLAGS_REG)
@@ -7964,22 +7969,24 @@ (define_insn_and_split "*subv<dwi>4_doubleword"
(match_dup 5)))])]
{
split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn_and_split "*subv<dwi>4_doubleword_1"
[(set (reg:CCO FLAGS_REG)
(eq:CCO
(minus:<QPWI>
(sign_extend:<QPWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "0"))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro"))
(match_operand:<QPWI> 3 "const_scalar_int_operand"))
(sign_extend:<QPWI>
(minus:<DWI>
(match_dup 1)
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
- (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
+ (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
(minus:<DWI> (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))
&& CONST_SCALAR_INT_P (operands[2])
&& rtx_equal_p (operands[2], operands[3])"
"#"
@@ -8017,7 +8024,8 @@ (define_insn_and_split "*subv<dwi>4_doubleword_1"
operands[5]));
DONE;
}
-})
+}
+[(set_attr "isa" "*,apx_ndd")])
(define_insn "*subv<mode>4_overflow_1"
[(set (reg:CCO FLAGS_REG)
@@ -8025,11 +8033,11 @@ (define_insn "*subv<mode>4_overflow_1"
(minus:<DWI>
(minus:<DWI>
(sign_extend:<DWI>
- (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)]))
(sign_extend:<DWI>
- (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
+ (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
(sign_extend:<DWI>
(minus:SWI
(minus:SWI
@@ -8037,15 +8045,21 @@ (define_insn "*subv<mode>4_overflow_1"
(match_operator:SWI 5 "ix86_carry_flag_operator"
[(match_dup 3) (const_int 0)]))
(match_dup 2)))))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
(minus:SWI
(minus:SWI
(match_dup 1)
(match_op_dup 5 [(match_dup 3) (const_int 0)]))
(match_dup 2)))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
- "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+ sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+ sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
(define_insn "*subv<mode>4_overflow_2"
@@ -8054,28 +8068,32 @@ (define_insn "*subv<mode>4_overflow_2"
(minus:<DWI>
(minus:<DWI>
(sign_extend:<DWI>
- (match_operand:SWI 1 "nonimmediate_operand" "%0"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,rm"))
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)]))
- (match_operand:<DWI> 6 "const_int_operand" "n"))
+ (match_operand:<DWI> 6 "const_int_operand" "n,n"))
(sign_extend:<DWI>
(minus:SWI
(minus:SWI
(match_dup 1)
(match_operator:SWI 5 "ix86_carry_flag_operator"
[(match_dup 3) (const_int 0)]))
- (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
+ (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
(minus:SWI
(minus:SWI
(match_dup 1)
(match_op_dup 5 [(match_dup 3) (const_int 0)]))
(match_dup 2)))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))
&& CONST_INT_P (operands[2])
&& INTVAL (operands[2]) == INTVAL (operands[6])"
- "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "@
+ sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+ sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "mode" "<MODE>")
(set (attr "length_immediate")
(if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
@@ -8360,15 +8378,18 @@ (define_insn "*addsi3_carry_zext_0"
(set_attr "mode" "SI")])
(define_insn "*addsi3_carry_zext_0r"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
(plus:SI (match_operator:SI 2 "ix86_carry_flag_unset_operator"
[(reg FLAGS_REG) (const_int 0)])
- (match_operand:SI 1 "register_operand" "0"))))
+ (match_operand:SI 1 "register_operand" "0,r"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT"
- "sbb{l}\t{$-1, %k0|%k0, -1}"
- [(set_attr "type" "alu")
+ "@
+ sbb{l}\t{$-1, %k0|%k0, -1}
+ sbb{l}\t{$-1, %1, %k0|%k0, %1, -1}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
@@ -8610,17 +8631,23 @@ (define_insn "*addcarry<mode>_1"
(const_string "4")))])
(define_insn "@sub<mode>3_carry"
- [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(minus:SWI
(minus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
(match_operator:SWI 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)]))
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
- "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+ sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+ sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
@@ -8707,18 +8734,22 @@ (define_insn "*sub<mode>3_carry_0r"
(set_attr "mode" "<MODE>")])
(define_insn "*subsi3_carry_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
(minus:SI
(minus:SI
- (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 1 "register_operand" "0,r")
(match_operator:SI 3 "ix86_carry_flag_operator"
[(reg FLAGS_REG) (const_int 0)]))
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
- "sbb{l}\t{%2, %k0|%k0, %2}"
- [(set_attr "type" "alu")
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sbb{l}\t{%2, %k0|%k0, %2}
+ sbb{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
@@ -8803,21 +8834,27 @@ (define_insn "subborrow<mode>"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(zero_extend:<DWI>
- (match_operand:SWI48 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SWI48 1 "nonimmediate_operand" "0,0,r,rm"))
(plus:<DWI>
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(zero_extend:<DWI>
- (match_operand:SWI48 2 "nonimmediate_operand" "r,rm")))))
- (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+ (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,rm,r")))))
+ (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
(minus:SWI48 (minus:SWI48
(match_dup 1)
(match_operator:SWI48 5 "ix86_carry_flag_operator"
[(match_dup 3) (const_int 0)]))
(match_dup 2)))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
- "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))"
+ "@
+ sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+ sbb{<imodesuffix>}\t{%2, %0|%0, %2}
+ sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
@@ -8978,7 +9015,8 @@ (define_expand "subborrow<mode>_0"
(match_operand:SWI48 2 "<general_operand>")))
(set (match_operand:SWI48 0 "register_operand")
(minus:SWI48 (match_dup 1) (match_dup 2)))])]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)")
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))")
(define_expand "uaddc<mode>5"
[(match_operand:SWI48 0 "register_operand")
@@ -9404,7 +9442,8 @@ (define_insn_and_split "*sub<mode>3_eq"
(const_int 0)))
(match_operand:SWI 2 "<general_operand>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
@@ -9429,7 +9468,8 @@ (define_insn_and_split "*sub<mode>3_ne"
"CONST_INT_P (operands[2])
&& (<MODE>mode != DImode
|| INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
- && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+ && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
@@ -9458,7 +9498,8 @@ (define_insn_and_split "*sub<mode>3_eq_1"
"CONST_INT_P (operands[2])
&& (<MODE>mode != DImode
|| INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
- && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+ && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
+ ix86_can_use_ndd_p (MINUS))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c b/gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c
new file mode 100644
index 00000000000..662e3c607d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { int128 && { ! ia32 } } } } */
+/* { dg-options "-mapxf -O2" } */
+
+#include "pr91681-2.c"
+
+/* { dg-final { scan-assembler-times "sbbq\[^\n\r]*0, %rdi, %rdx" 1 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 08/16] [APX NDD] Support APX NDD for neg insn
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (6 preceding siblings ...)
2023-11-15 9:46 ` [PATCH 07/16] [APX NDD] Support APX NDD for sbb insn Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:46 ` [PATCH 09/16] [APX NDD] Support APX NDD for not insn Hongyu Wang
` (7 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu, Kong Lingling
From: Kong Lingling <lingling.kong@intel.com>
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add NEG
support.
(ix86_expand_unary_operator): Add use_ndd parameter and adjust for NDD.
* config/i386/i386-protos.h : Add use_ndd parameter for
ix86_unary_operator_ok and ix86_expand_unary_operator.
* config/i386/i386.cc (ix86_unary_operator_ok): Add ndd constraints,
and add use_ndd parameter.
* config/i386/i386.md (neg<mode>2): Add ndd constraints.
(*neg<mode>_1): Likewise.
(*neg<dwi>2_doubleword): Likewise.
(*negsi_1_zext): Likewise.
(*neg<mode>_2): Likewise.
(*negsi_2_zext): Likewise.
(*neg<mode>_ccc_1): Likewise.
(*neg<mode>_ccc_2): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add neg test.
---
gcc/config/i386/i386-expand.cc | 5 +-
gcc/config/i386/i386-protos.h | 5 +-
gcc/config/i386/i386.cc | 5 +-
gcc/config/i386/i386.md | 79 ++++++++++++++++---------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 29 +++++++++
5 files changed, 90 insertions(+), 33 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index e5f75875e3b..995cc792c5f 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1271,6 +1271,7 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
{
case PLUS:
case MINUS:
+ case NEG:
return true;
default:
return false;
@@ -1511,7 +1512,7 @@ ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
void
ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
- rtx operands[])
+ rtx operands[], bool use_ndd)
{
bool matching_memory = false;
rtx src, dst, op, clob;
@@ -1530,7 +1531,7 @@ ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
}
/* When source operand is memory, destination must match. */
- if (MEM_P (src) && !matching_memory)
+ if (!use_ndd && MEM_P (src) && !matching_memory)
src = force_reg (mode, src);
/* Emit the instruction. */
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index ad895fac72d..0010fd71011 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -128,7 +128,7 @@ extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high);
extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
extern bool ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn);
extern void ix86_expand_unary_operator (enum rtx_code, machine_mode,
- rtx[]);
+ rtx[], bool = false);
extern rtx ix86_build_const_vector (machine_mode, bool, rtx);
extern rtx ix86_build_signbit_mask (machine_mode, bool, bool);
extern HOST_WIDE_INT ix86_convert_const_vector_to_integer (rtx,
@@ -148,7 +148,8 @@ extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode,
rtx[]);
extern void ix86_expand_copysign (rtx []);
extern void ix86_expand_xorsign (rtx []);
-extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[2]);
+extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[2],
+ bool = false);
extern bool ix86_match_ccmode (rtx, machine_mode);
extern bool ix86_match_ptest_ccmode (rtx);
extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 47159b06f7d..9b0715943f7 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -16160,11 +16160,12 @@ ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
bool
ix86_unary_operator_ok (enum rtx_code,
machine_mode,
- rtx operands[2])
+ rtx operands[2],
+ bool use_ndd)
{
/* If one of operands is memory, source and destination must match. */
if ((MEM_P (operands[0])
- || MEM_P (operands[1]))
+ || (!use_ndd && MEM_P (operands[1])))
&& ! rtx_equal_p (operands[0], operands[1]))
return false;
return true;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c3dcfaf52e1..8ba524e9e44 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12952,13 +12952,15 @@ (define_expand "neg<mode>2"
[(set (match_operand:SDWIM 0 "nonimmediate_operand")
(neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
""
- "ix86_expand_unary_operator (NEG, <MODE>mode, operands); DONE;")
+ "ix86_expand_unary_operator (NEG, <MODE>mode, operands,
+ ix86_can_use_ndd_p (NEG)); DONE;")
(define_insn_and_split "*neg<dwi>2_doubleword"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
- (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_unary_operator_ok (NEG, <DWI>mode, operands)"
+ "ix86_unary_operator_ok (NEG, <DWI>mode, operands,
+ ix86_can_use_ndd_p (NEG))"
"#"
"&& reload_completed"
[(parallel
@@ -12975,7 +12977,8 @@ (define_insn_and_split "*neg<dwi>2_doubleword"
[(set (match_dup 2)
(neg:DWIH (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
- "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
+ "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
+ [(set_attr "isa" "*,apx_ndd")])
;; Convert:
;; mov %esi, %edx
@@ -13064,22 +13067,30 @@ (define_peephole2
(clobber (reg:CC FLAGS_REG))])])
(define_insn "*neg<mode>_1"
- [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
- (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
+ [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
+ (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
- "neg{<imodesuffix>}\t%0"
+ "ix86_unary_operator_ok (NEG, <MODE>mode, operands,
+ ix86_can_use_ndd_p (NEG))"
+ "@
+ neg{<imodesuffix>}\t%0
+ neg{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "negnot")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "<MODE>")])
(define_insn "*negsi_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
- (neg:SI (match_operand:SI 1 "register_operand" "0"))))
+ (neg:SI (match_operand:SI 1 "register_operand" "0,r"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
- "neg{l}\t%k0"
+ "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands,
+ ix86_can_use_ndd_p (NEG))"
+ "@
+ neg{l}\t%k0
+ neg{l}\t{%k1, %k0|%k0, %k1}"
[(set_attr "type" "negnot")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
;; Alternative 1 is needed to work around LRA limitation, see PR82524.
@@ -13105,51 +13116,65 @@ (define_insn_and_split "*neg<mode>_1_slp"
(define_insn "*neg<mode>_2"
[(set (reg FLAGS_REG)
(compare
- (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
+ (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
(const_int 0)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
(neg:SWI (match_dup 1)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
- "neg{<imodesuffix>}\t%0"
+ && ix86_unary_operator_ok (NEG, <MODE>mode, operands,
+ ix86_can_use_ndd_p (NEG))"
+ "@
+ neg{<imodesuffix>}\t%0
+ neg{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "negnot")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "<MODE>")])
(define_insn "*negsi_2_zext"
[(set (reg FLAGS_REG)
(compare
- (neg:SI (match_operand:SI 1 "register_operand" "0"))
+ (neg:SI (match_operand:SI 1 "register_operand" "0,r"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
(neg:SI (match_dup 1))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && ix86_unary_operator_ok (NEG, SImode, operands)"
- "neg{l}\t%k0"
+ && ix86_unary_operator_ok (NEG, SImode, operands,
+ ix86_can_use_ndd_p (NEG))"
+ "@
+ neg{l}\t%k0
+ neg{l}\t{%k1, %k0|%k0, %k1}"
[(set_attr "type" "negnot")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*neg<mode>_ccc_1"
[(set (reg:CCC FLAGS_REG)
(unspec:CCC
- [(match_operand:SWI 1 "nonimmediate_operand" "0")
+ [(match_operand:SWI 1 "nonimmediate_operand" "0,rm")
(const_int 0)] UNSPEC_CC_NE))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
(neg:SWI (match_dup 1)))]
""
- "neg{<imodesuffix>}\t%0"
+ "@
+ neg{<imodesuffix>}\t%0
+ neg{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "negnot")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "<MODE>")])
(define_insn "*neg<mode>_ccc_2"
[(set (reg:CCC FLAGS_REG)
(unspec:CCC
- [(match_operand:SWI 1 "nonimmediate_operand" "0")
+ [(match_operand:SWI 1 "nonimmediate_operand" "0,r")
(const_int 0)] UNSPEC_CC_NE))
- (clobber (match_scratch:SWI 0 "=<r>"))]
+ (clobber (match_scratch:SWI 0 "=<r>,r"))]
""
- "neg{<imodesuffix>}\t%0"
+ "@
+ neg{<imodesuffix>}\t%0
+ neg{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "negnot")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "<MODE>")])
(define_expand "x86_neg<mode>_ccc"
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 9d444e0d830..18b423258ea 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -27,8 +27,25 @@ foo2_##OP_NAME##_##TYPE (TYPE *a, TYPE b) \
{ \
TYPE c = *a OP b; \
return c; \
+}
+
+#define F(TYPE, OP_NAME, OP) \
+TYPE \
+__attribute__ ((noipa)) \
+f_##OP_NAME##_##TYPE (TYPE *a) \
+{ \
+ TYPE b = OP*a; \
+ return b; \
}
+#define F1(TYPE, OP_NAME, OP) \
+TYPE \
+__attribute__ ((noipa)) \
+f1_##OP_NAME##_##TYPE (TYPE a) \
+{ \
+ TYPE b = OP a; \
+ return b; \
+}
FOO (char, add, +)
FOO1 (char, add, +)
FOO2 (char, add, +)
@@ -50,8 +67,20 @@ FOO (int, sub, -)
FOO1 (int, sub, -)
FOO (long, sub, -)
FOO1 (long, sub, -)
+
+F (char, neg, -)
+F1 (char, neg, -)
+F (short, neg, -)
+F1 (short, neg, -)
+F (int, neg, -)
+F1 (int, neg, -)
+F (long, neg, -)
+F1 (long, neg, -)
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "sub(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "sub(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "negb\[^\n\r]\\(%rdi\\), %(?:|r|e)al" 1 } } */
+/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 3 } } */
+/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 09/16] [APX NDD] Support APX NDD for not insn
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (7 preceding siblings ...)
2023-11-15 9:46 ` [PATCH 08/16] [APX NDD] Support APX NDD for neg insn Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:46 ` [PATCH 10/16] [APX NDD] Support APX NDD for and insn Hongyu Wang
` (6 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu, Kong Lingling
From: Kong Lingling <lingling.kong@intel.com>
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add NOT
support.
* config/i386/i386.md (one_cmpl<mode>2): Add NDD constraints, adjust
output template.
(*one_cmpl<mode>2_1): Likewise.
(*one_cmplqi2_1): Likewise.
(*one_cmpl<dwi>2_doubleword): Likewise.
(*one_cmplsi2_1_zext): Likewise.
(*one_cmpl<mode>2_2): Likewise.
(*one_cmplsi2_2_zext): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add not test.
---
gcc/config/i386/i386-expand.cc | 1 +
gcc/config/i386/i386.md | 73 +++++++++++++++----------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 11 ++++
3 files changed, 55 insertions(+), 30 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 995cc792c5f..be77ba4a476 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1272,6 +1272,7 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
case PLUS:
case MINUS:
case NEG:
+ case NOT:
return true;
default:
return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8ba524e9e44..9758e4e5144 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -13673,64 +13673,73 @@ (define_expand "one_cmpl<mode>2"
[(set (match_operand:SDWIM 0 "nonimmediate_operand")
(not:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
""
- "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
+ "ix86_expand_unary_operator (NOT, <MODE>mode, operands,
+ ix86_can_use_ndd_p (NOT)); DONE;")
(define_insn_and_split "*one_cmpl<dwi>2_doubleword"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
- (not:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))]
- "ix86_unary_operator_ok (NOT, <DWI>mode, operands)"
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ (not:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))]
+ "ix86_unary_operator_ok (NOT, <DWI>mode, operands,
+ ix86_can_use_ndd_p (NOT))"
"#"
"&& reload_completed"
[(set (match_dup 0)
(not:DWIH (match_dup 1)))
(set (match_dup 2)
(not:DWIH (match_dup 3)))]
- "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
+ "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
+ [(set_attr "isa" "*,apx_ndd")])
(define_insn "*one_cmpl<mode>2_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,?k")
- (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,k")))]
- "ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+ (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,rm,k")))]
+ "ix86_unary_operator_ok (NOT, <MODE>mode, operands,
+ ix86_can_use_ndd_p (NOT))"
"@
not{<imodesuffix>}\t%0
+ not{<imodesuffix>}\t{%1, %0|%0, %1}
#"
- [(set_attr "isa" "*,<kmov_isa>")
- (set_attr "type" "negnot,msklog")
+ [(set_attr "isa" "*,apx_ndd,<kmov_isa>")
+ (set_attr "type" "negnot,negnot,msklog")
(set_attr "mode" "<MODE>")])
(define_insn "*one_cmplsi2_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,?k")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,?k")
(zero_extend:DI
- (not:SI (match_operand:SI 1 "register_operand" "0,k"))))]
- "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
+ (not:SI (match_operand:SI 1 "register_operand" "0,r,k"))))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands,
+ ix86_can_use_ndd_p (NOT))"
"@
not{l}\t%k0
+ not{l}\t{%k1, %k0|%k0, %k1}
#"
- [(set_attr "isa" "x64,avx512bw_512")
- (set_attr "type" "negnot,msklog")
- (set_attr "mode" "SI,SI")])
+ [(set_attr "isa" "x64,apx_ndd,avx512bw_512")
+ (set_attr "type" "negnot,negnot,msklog")
+ (set_attr "mode" "SI,SI,SI")])
(define_insn "*one_cmplqi2_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,?k")
- (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
- "ix86_unary_operator_ok (NOT, QImode, operands)"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r,?k")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,rm,k")))]
+ "ix86_unary_operator_ok (NOT, QImode, operands,
+ ix86_can_use_ndd_p (NOT))"
"@
not{b}\t%0
not{l}\t%k0
+ not{l}\t{%k1, %k0|%k0, %k1}
#"
- [(set_attr "isa" "*,*,avx512f")
- (set_attr "type" "negnot,negnot,msklog")
+ [(set_attr "isa" "*,*,apx_ndd,avx512f")
+ (set_attr "type" "negnot,negnot,negnot,msklog")
(set (attr "mode")
- (cond [(eq_attr "alternative" "1")
+ (cond [(eq_attr "alternative" "1,2")
(const_string "SI")
- (and (eq_attr "alternative" "2")
+ (and (eq_attr "alternative" "3")
(match_test "!TARGET_AVX512DQ"))
(const_string "HI")
]
(const_string "QI")))
;; Potential partial reg stall on alternative 1.
(set (attr "preferred_for_speed")
- (cond [(eq_attr "alternative" "1")
+ (cond [(eq_attr "alternative" "1,2")
(symbol_ref "!TARGET_PARTIAL_REG_STALL")]
(symbol_ref "true")))])
@@ -13753,14 +13762,16 @@ (define_insn_and_split "*one_cmpl<mode>_1_slp"
(define_insn "*one_cmpl<mode>2_2"
[(set (reg FLAGS_REG)
- (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
+ (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
(const_int 0)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
(not:SWI (match_dup 1)))]
"ix86_match_ccmode (insn, CCNOmode)
- && ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
+ && ix86_unary_operator_ok (NOT, <MODE>mode, operands,
+ ix86_can_use_ndd_p (NOT))"
"#"
[(set_attr "type" "alu1")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "<MODE>")])
(define_split
@@ -13779,14 +13790,16 @@ (define_split
(define_insn "*one_cmplsi2_2_zext"
[(set (reg FLAGS_REG)
- (compare (not:SI (match_operand:SI 1 "register_operand" "0"))
+ (compare (not:SI (match_operand:SI 1 "register_operand" "0,r"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (not:SI (match_dup 1))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
- && ix86_unary_operator_ok (NOT, SImode, operands)"
+ && ix86_unary_operator_ok (NOT, SImode, operands,
+ ix86_can_use_ndd_p (NOT))"
"#"
[(set_attr "type" "alu1")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
(define_split
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 18b423258ea..9af72d1a46d 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -76,6 +76,15 @@ F (int, neg, -)
F1 (int, neg, -)
F (long, neg, -)
F1 (long, neg, -)
+
+F (char, not, ~)
+F1 (char, not, ~)
+F (short, not, ~)
+F1 (short, not, ~)
+F (int, not, ~)
+F1 (int, not, ~)
+F (long, not, ~)
+F1 (long, not, ~)
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
@@ -84,3 +93,5 @@ F1 (long, neg, -)
/* { dg-final { scan-assembler-times "negb\[^\n\r]\\(%rdi\\), %(?:|r|e)al" 1 } } */
/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 3 } } */
/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 10/16] [APX NDD] Support APX NDD for and insn
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (8 preceding siblings ...)
2023-11-15 9:46 ` [PATCH 09/16] [APX NDD] Support APX NDD for not insn Hongyu Wang
@ 2023-11-15 9:46 ` Hongyu Wang
2023-11-15 9:47 ` [PATCH 11/16] [APX NDD] Support APX NDD for or/xor insn Hongyu Wang
` (5 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:46 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu, Kong Lingling
From: Kong Lingling <lingling.kong@intel.com>
For NDD form AND insn, there are three splitter fixes after extending legacy
patterns.
1. APX NDD does not support high QImode registers like ah, bh, ch, dh, so for
some optimization splitters that generates highpart zero_extract for QImode
need to be prohibited under NDD pattern.
2. Legacy AND insn will use r/qm/L constraint, and a post-reload splitter will
transform it into zero_extend move. But for NDD form AND, the splitter is not
strict enough as the splitter assum such AND will have the const_int operand
matching the constraint "L", then NDD form AND allows const_int with any QI
values. Restrict the splitter condition to match "L" constraint that strictly
matches zero-extend sematic.
3. Legacy AND insn will adopt r/0/Z constraint, a splitter will try to optimize
such form into strict_lowpart QImode AND when 7th bit is not set. But the
splitter will wronly convert non-zext form of NDD and with memory src, then the
strict_lowpart transform matches alternative 1 of *<code><mode>_slp_1 and
generates *movstrict<mode>_1 so the zext sematic was omitted. This could cause
highpart of dest not cleared and generates wrong code. Disable the splitter
when NDD adopted and operands[0] and operands[1] are not equal.
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add AND
support.
* config/i386/i386.md (and<mode>3): Add NDD alternatives and adjust
output template.
(*anddi_1): Likewise.
(*and<mode>_1): Likewise.
(*andqi_1): Likewise.
(*andsi_1_zext): Likewise.
(*anddi_2): Likewise.
(*andsi_2_zext): Likewise.
(*andqi_2_maybe_si): Likewise.
(*and<mode>_2): Likewise.
(*and<dwi>3_doubleword): Add NDD constraints, emit move for optimized
case if operands[0] not equal to operands[1].
(define_split for QI highpart AND): Prohibit splitter to split NDD
form AND insn to <any_logic:code>qi_ext<mode>_3.
(define_split for QI strict_lowpart optimization): Prohibit splitter to
split NDD form AND insn to *<code><mode>3_1_slp.
(define_split for zero_extend and optimization): Prohibit splitter to
split NDD form AND insn to zero_extend insn.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add and test.
* gcc.target/i386/apx-spill_to_egprs-1.c: Change some check.
---
gcc/config/i386/i386-expand.cc | 1 +
gcc/config/i386/i386.md | 177 ++++++++++++------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 13 ++
.../gcc.target/i386/apx-spill_to_egprs-1.c | 8 +-
4 files changed, 135 insertions(+), 64 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index be77ba4a476..662f687abc3 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1273,6 +1273,7 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
case MINUS:
case NEG:
case NOT:
+ case AND:
return true;
default:
return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9758e4e5144..4bf0c16f401 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11471,18 +11471,20 @@ (define_expand "and<mode>3"
(operands[0], gen_lowpart (mode, operands[1]),
<MODE>mode, mode, 1));
else
- ix86_expand_binary_operator (AND, <MODE>mode, operands);
+ ix86_expand_binary_operator (AND, <MODE>mode, operands,
+ ix86_can_use_ndd_p (AND));
DONE;
})
(define_insn_and_split "*and<dwi>3_doubleword"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(and:<DWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (AND, <DWI>mode, operands,
+ ix86_can_use_ndd_p (AND))"
"#"
"&& reload_completed"
[(const_int:DWIH 0)]
@@ -11494,39 +11496,53 @@ (define_insn_and_split "*and<dwi>3_doubleword"
if (operands[2] == const0_rtx)
emit_move_insn (operands[0], const0_rtx);
else if (operands[2] == constm1_rtx)
- emit_insn_deleted_note_p = true;
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ emit_insn_deleted_note_p = true;
+ }
else
- ix86_expand_binary_operator (AND, <MODE>mode, &operands[0]);
+ ix86_expand_binary_operator (AND, <MODE>mode, &operands[0],
+ ix86_can_use_ndd_p (AND));
if (operands[5] == const0_rtx)
emit_move_insn (operands[3], const0_rtx);
else if (operands[5] == constm1_rtx)
{
- if (emit_insn_deleted_note_p)
+ if (!rtx_equal_p (operands[3], operands[4]))
+ emit_move_insn (operands[3], operands[4]);
+ else if (emit_insn_deleted_note_p)
emit_note (NOTE_INSN_DELETED);
}
else
- ix86_expand_binary_operator (AND, <MODE>mode, &operands[3]);
+ ix86_expand_binary_operator (AND, <MODE>mode, &operands[3],
+ ix86_can_use_ndd_p (AND));
DONE;
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn "*anddi_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,?k")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,?k")
(and:DI
- (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
- (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k")))
+ (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,r,qm,k")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,re,m,L,k")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands,
+ ix86_can_use_ndd_p (AND))"
"@
and{l}\t{%k2, %k0|%k0, %k2}
+ and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
and{q}\t{%2, %0|%0, %2}
and{q}\t{%2, %0|%0, %2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}
#
#"
- [(set_attr "isa" "x64,x64,x64,x64,avx512bw_512")
- (set_attr "type" "alu,alu,alu,imovx,msklog")
- (set_attr "length_immediate" "*,*,*,0,*")
+ [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,x64,avx512bw_512")
+ (set_attr "type" "alu,alu,alu,alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,*,*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -11534,7 +11550,7 @@ (define_insn "*anddi_1"
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "SI,DI,DI,SI,DI")])
+ (set_attr "mode" "SI,SI,DI,DI,DI,DI,SI,DI")])
(define_insn_and_split "*anddi_1_btr"
[(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
@@ -11589,36 +11605,46 @@ (define_split
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*andsi_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
- (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
- "and{l}\t{%2, %k0|%k0, %2}"
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands,
+ ix86_can_use_ndd_p (AND))"
+ "@
+ and{l}\t{%2, %k0|%k0, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*and<mode>_1"
- [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,?k")
- (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k")
- (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,L,k")))
+ [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,Ya,?k")
+ (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,r,qm,k")
+ (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,L,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (AND, <MODE>mode, operands,
+ ix86_can_use_ndd_p (AND))"
"@
and{<imodesuffix>}\t{%2, %0|%0, %2}
and{<imodesuffix>}\t{%2, %0|%0, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
#
#"
[(set (attr "isa")
- (cond [(eq_attr "alternative" "3")
+ (cond [(eq_attr "alternative" "2,3")
+ (const_string "apx_ndd")
+ (eq_attr "alternative" "5")
(if_then_else (eq_attr "mode" "SI")
(const_string "avx512bw")
(const_string "avx512f"))
]
(const_string "*")))
- (set_attr "type" "alu,alu,imovx,msklog")
- (set_attr "length_immediate" "*,*,0,*")
+ (set_attr "type" "alu,alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -11626,24 +11652,28 @@ (define_insn "*and<mode>_1"
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")])
+ (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
(define_insn "*andqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
- (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
- (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+ (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, QImode, operands)"
+ "ix86_binary_operator_ok (AND, QImode, operands,
+ ix86_can_use_ndd_p (AND))"
"@
and{b}\t{%2, %0|%0, %2}
and{b}\t{%2, %0|%0, %2}
and{l}\t{%k2, %k0|%k0, %k2}
+ and{b}\t{%2, %1, %0|%0, %1, %2}
+ and{b}\t{%2, %1, %0|%0, %1, %2}
#"
- [(set_attr "type" "alu,alu,alu,msklog")
+ [(set_attr "type" "alu,alu,alu,alu,alu,msklog")
+ (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
- (and (eq_attr "alternative" "3")
+ (and (eq_attr "alternative" "5")
(match_test "!TARGET_AVX512DQ"))
(const_string "HI")
]
@@ -11683,7 +11713,10 @@ (define_split
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (!REG_P (operands[1])
- || REGNO (operands[0]) != REGNO (operands[1]))"
+ || REGNO (operands[0]) != REGNO (operands[1]))
+ && (UINTVAL (operands[2]) == GET_MODE_MASK (SImode)
+ || UINTVAL (operands[2]) == GET_MODE_MASK (HImode)
+ || UINTVAL (operands[2]) == GET_MODE_MASK (QImode))"
[(const_int 0)]
{
unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
@@ -11756,10 +11789,10 @@ (define_insn "*anddi_2"
[(set (reg FLAGS_REG)
(compare
(and:DI
- (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m"))
+ (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,r,rm,r")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,Z,re,m"))
(const_int 0)))
- (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,r,r")
(and:DI (match_dup 1) (match_dup 2)))]
"TARGET_64BIT
&& ix86_match_ccmode
@@ -11773,38 +11806,49 @@ (define_insn "*anddi_2"
&& (!CONST_INT_P (operands[2])
|| val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
? CCZmode : CCNOmode)
- && ix86_binary_operator_ok (AND, DImode, operands)"
+ && ix86_binary_operator_ok (AND, DImode, operands,
+ ix86_can_use_ndd_p (AND))"
"@
and{l}\t{%k2, %k0|%k0, %k2}
and{q}\t{%2, %0|%0, %2}
- and{q}\t{%2, %0|%0, %2}"
+ and{q}\t{%2, %0|%0, %2}
+ and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "alu")
- (set_attr "mode" "SI,DI,DI")])
+ (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
+ (set_attr "mode" "SI,DI,DI,SI,DI,DI")])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*andsi_2_zext"
[(set (reg FLAGS_REG)
(compare (and:SI
- (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (AND, SImode, operands)"
- "and{l}\t{%2, %k0|%k0, %2}"
+ && ix86_binary_operator_ok (AND, SImode, operands,
+ ix86_can_use_ndd_p (AND))"
+ "@
+ and{l}\t{%2, %k0|%k0, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*andqi_2_maybe_si"
[(set (reg FLAGS_REG)
(compare (and:QI
- (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:QI 2 "general_operand" "qn,m,n"))
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r")
+ (match_operand:QI 2 "general_operand" "qn,m,n,rn,m"))
(const_int 0)))
- (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r")
(and:QI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (AND, QImode, operands)
+ "ix86_binary_operator_ok (AND, QImode, operands,
+ ix86_can_use_ndd_p (AND))
&& ix86_match_ccmode (insn,
CONST_INT_P (operands[2])
&& INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
@@ -11815,9 +11859,12 @@ (define_insn "*andqi_2_maybe_si"
operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
return "and{l}\t{%2, %k0|%k0, %2}";
}
+ if (which_alternative > 2)
+ return "and{b}\t{%2, %1, %0|%0, %1, %2}";
return "and{b}\t{%2, %0|%0, %2}";
}
[(set_attr "type" "alu")
+ (set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
@@ -11836,15 +11883,21 @@ (define_insn "*andqi_2_maybe_si"
(define_insn "*and<mode>_2"
[(set (reg FLAGS_REG)
(compare (and:SWI124
- (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>"))
+ (match_operand:SWI124 1 "nonimmediate_operand" "%0,0,rm,r")
+ (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
(const_int 0)))
- (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(and:SWI124 (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
- "and{<imodesuffix>}\t{%2, %0|%0, %2}"
+ && ix86_binary_operator_ok (AND, <MODE>mode, operands,
+ ix86_can_use_ndd_p (AND))"
+ "@
+ and{<imodesuffix>}\t{%2, %0|%0, %2}
+ and{<imodesuffix>}\t{%2, %0|%0, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,*,apx_ndd,apx_ndd")
(set_attr "mode" "<MODE>")])
(define_insn "*<code>qi_ext<mode>_0"
@@ -12057,6 +12110,7 @@ (define_insn_and_split "*<code>qi_ext<mode>_3"
;; Don't do the splitting with memory operands, since it introduces risk
;; of memory mismatch stalls. We may want to do the splitting for optimizing
;; for size, but that can (should?) be handled by generic code instead.
+;; Don't do the splitting for APX NDD as NDD does not support *h registers.
(define_split
[(set (match_operand:SWI248 0 "QIreg_operand")
(and:SWI248 (match_operand:SWI248 1 "register_operand")
@@ -12064,7 +12118,8 @@ (define_split
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
- && !(~INTVAL (operands[2]) & ~(255 << 8))"
+ && !(~INTVAL (operands[2]) & ~(255 << 8))
+ && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
[(parallel
[(set (zero_extract:HI (match_dup 0)
(const_int 8)
@@ -12093,7 +12148,9 @@ (define_split
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& !(~INTVAL (operands[2]) & ~255)
- && !(INTVAL (operands[2]) & 128)"
+ && !(INTVAL (operands[2]) & 128)
+ && !(TARGET_APX_NDD
+ && !rtx_equal_p (operands[0], operands[1]))"
[(parallel [(set (strict_low_part (match_dup 0))
(and:QI (match_dup 1)
(match_dup 2)))
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 9af72d1a46d..b34194b762d 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -85,6 +85,15 @@ F (int, not, ~)
F1 (int, not, ~)
F (long, not, ~)
F1 (long, not, ~)
+
+FOO (char, and, &)
+FOO1 (char, and, &)
+FOO (short, and, &)
+FOO1 (short, and, &)
+FOO (int, and, &)
+FOO1 (int, and, &)
+FOO (long, and, &)
+FOO1 (long, and, &)
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
@@ -95,3 +104,7 @@ F1 (long, not, ~)
/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "andb\[^\n\r]*1, \\(%rdi\\), %al" 1 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/apx-spill_to_egprs-1.c b/gcc/testsuite/gcc.target/i386/apx-spill_to_egprs-1.c
index 290863d63a7..ecddcd6c14c 100644
--- a/gcc/testsuite/gcc.target/i386/apx-spill_to_egprs-1.c
+++ b/gcc/testsuite/gcc.target/i386/apx-spill_to_egprs-1.c
@@ -3,8 +3,8 @@
#include "spill_to_mask-1.c"
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r16d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r17d" } } */
+/* { dg-final { scan-assembler "(?:movl|rorx)\[ \t]+\[^\\n\\r\]*, %r16d" } } */
+/* { dg-final { scan-assembler "(?:movl|rorx)\[ \t]+\[^\\n\\r\]*, %r17d" } } */
/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r18d" } } */
/* { dg-final { scan-assembler "movq\[ \t]+\[^\\n\\r\]*, %r19" } } */
/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r20d" } } */
@@ -13,8 +13,8 @@
/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r23d" } } */
/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r24d" } } */
/* { dg-final { scan-assembler "addl\[ \t]+\[^\\n\\r\]*, %r25d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r26d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r27d" } } */
+/* { dg-final { scan-assembler "(?:movl|movbel)\[ \t]+\[^\\n\\r\]*, %r26d" } } */
+/* { dg-final { scan-assembler "(?:movl|movbel)\[ \t]+\[^\\n\\r\]*, %r27d" } } */
/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r28d" } } */
/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r29d" } } */
/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r30d" } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 11/16] [APX NDD] Support APX NDD for or/xor insn
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (9 preceding siblings ...)
2023-11-15 9:46 ` [PATCH 10/16] [APX NDD] Support APX NDD for and insn Hongyu Wang
@ 2023-11-15 9:47 ` Hongyu Wang
2023-11-15 9:47 ` [PATCH 12/16] [APX NDD] Support APX NDD for left shift insns Hongyu Wang
` (4 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:47 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu, Kong Lingling
From: Kong Lingling <lingling.kong@intel.com>
Similar to AND insn, two splitters need to be adjusted to prevent
misoptimizaiton for NDD OR/XOR.
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add IOR/XOR
support.
* config/i386/i386.md (<code><mode>3): Add NDD alternative and adjust
output templates.
(*<code><mode>_1): Likewise.
(*<code>qi_1): Likewise.
(*notxor<mode>_1): Likewise.
(*<code>si_1_zext): Likewise.
(*<code>si_1_zext_imm): Likewise.
(*notxorqi_1): Likewise.
(*<code><mode>_2): Likewise.
(*<code>si_2_zext): Likewise.
(*<code>si_2_zext_imm): Likewise.
(*<code><dwi>3_doubleword): Add NDD constraints, emit move for
optimized case if operands[0] != operands[1] or operands[4]
!= operands[5].
(define_split for QI highpart OR/XOR): Prohibit splitter to split NDD
form OR/XOR insn to <any_logic:code>qi_ext<mode>_3.
(define_split for QI strict_lowpart optimization): Prohibit splitter to
split NDD form AND insn to *<code><mode>3_1_slp.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add or and xor test.
---
gcc/config/i386/i386-expand.cc | 2 +
gcc/config/i386/i386.md | 180 +++++++++++++++---------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 26 ++++
3 files changed, 143 insertions(+), 65 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 662f687abc3..5f02d557a50 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1274,6 +1274,8 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
case NEG:
case NOT:
case AND:
+ case IOR:
+ case XOR:
return true;
default:
return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4bf0c16f401..cf9842d1a49 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12372,17 +12372,19 @@ (define_expand "<code><mode>3"
&& !x86_64_hilo_general_operand (operands[2], <MODE>mode))
operands[2] = force_reg (<MODE>mode, operands[2]);
- ix86_expand_binary_operator (<CODE>, <MODE>mode, operands);
+ ix86_expand_binary_operator (<CODE>, <MODE>mode, operands,
+ ix86_can_use_ndd_p (<CODE>));
DONE;
})
(define_insn_and_split "*<code><dwi>3_doubleword"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(any_or:<DWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
"#"
"&& reload_completed"
[(const_int:DWIH 0)]
@@ -12394,20 +12396,29 @@ (define_insn_and_split "*<code><dwi>3_doubleword"
split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
if (operands[2] == const0_rtx)
- emit_insn_deleted_note_p = true;
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ emit_insn_deleted_note_p = true;
+ }
else if (operands[2] == constm1_rtx)
{
if (<CODE> == IOR)
emit_move_insn (operands[0], constm1_rtx);
else
- ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0]);
+ ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0],
+ ix86_can_use_ndd_p (NOT));
}
else
- ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0]);
+ ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0],
+ ix86_can_use_ndd_p (<CODE>));
if (operands[5] == const0_rtx)
{
- if (emit_insn_deleted_note_p)
+ if (!rtx_equal_p (operands[3], operands[4]))
+ emit_move_insn (operands[3], operands[4]);
+ else if (emit_insn_deleted_note_p)
emit_note (NOTE_INSN_DELETED);
}
else if (operands[5] == constm1_rtx)
@@ -12415,37 +12426,44 @@ (define_insn_and_split "*<code><dwi>3_doubleword"
if (<CODE> == IOR)
emit_move_insn (operands[3], constm1_rtx);
else
- ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3]);
+ ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3],
+ ix86_can_use_ndd_p (NOT));
}
else
- ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3]);
+ ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3],
+ ix86_can_use_ndd_p (<CODE>));
DONE;
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn "*<code><mode>_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
(any_or:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
- (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k")))
+ (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
+ (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
"@
<logic>{<imodesuffix>}\t{%2, %0|%0, %2}
<logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
#"
- [(set_attr "isa" "*,*,<kmov_isa>")
- (set_attr "type" "alu, alu, msklog")
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
+ (set_attr "type" "alu, alu, alu, alu, msklog")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*notxor<mode>_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
(not:SWI248
(xor:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
- (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k"))))
+ (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
+ (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k"))))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (XOR, <MODE>mode, operands,
+ ix86_can_use_ndd_p (XOR))"
"#"
"&& reload_completed"
[(parallel
@@ -12461,8 +12479,8 @@ (define_insn_and_split "*notxor<mode>_1"
DONE;
}
}
- [(set_attr "isa" "*,*,<kmov_isa>")
- (set_attr "type" "alu, alu, msklog")
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
+ (set_attr "type" "alu, alu, alu, alu, msklog")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*iordi_1_bts"
@@ -12550,44 +12568,56 @@ (define_insn_and_split "*xor2andn"
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*<code>si_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
- (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+ (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
- "<logic>{l}\t{%2, %k0|%k0, %2}"
+ "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{l}\t{%2, %k0|%k0, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*<code>si_1_zext_imm"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(any_or:DI
- (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
- (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+ (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,r"))
+ (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z,Z")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
- "<logic>{l}\t{%2, %k0|%k0, %2}"
+ "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{l}\t{%2, %k0|%k0, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*<code>qi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
- (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
- (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
+ (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+ (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, QImode, operands)"
+ "ix86_binary_operator_ok (<CODE>, QImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
"@
<logic>{b}\t{%2, %0|%0, %2}
<logic>{b}\t{%2, %0|%0, %2}
<logic>{l}\t{%k2, %k0|%k0, %k2}
+ <logic>{b}\t{%2, %1, %0|%0, %1, %2}
+ <logic>{b}\t{%2, %1, %0|%0, %1, %2}
#"
- [(set_attr "isa" "*,*,*,avx512f")
- (set_attr "type" "alu,alu,alu,msklog")
+ [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
+ (set_attr "type" "alu,alu,alu,alu,alu,msklog")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
- (and (eq_attr "alternative" "3")
+ (and (eq_attr "alternative" "5")
(match_test "!TARGET_AVX512DQ"))
(const_string "HI")
]
@@ -12599,12 +12629,13 @@ (define_insn "*<code>qi_1"
(symbol_ref "true")))])
(define_insn_and_split "*notxorqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
(not:QI
- (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
- (match_operand:QI 2 "general_operand" "qn,m,rn,k"))))
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+ (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (XOR, QImode, operands)"
+ "ix86_binary_operator_ok (XOR, QImode, operands,
+ ix86_can_use_ndd_p (XOR))"
"#"
"&& reload_completed"
[(parallel
@@ -12620,12 +12651,12 @@ (define_insn_and_split "*notxorqi_1"
DONE;
}
}
- [(set_attr "isa" "*,*,*,avx512f")
- (set_attr "type" "alu,alu,alu,msklog")
+ [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
+ (set_attr "type" "alu,alu,alu,alu,alu,msklog")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
- (and (eq_attr "alternative" "3")
+ (and (eq_attr "alternative" "5")
(match_test "!TARGET_AVX512DQ"))
(const_string "HI")
]
@@ -12673,44 +12704,59 @@ (define_split
(define_insn "*<code><mode>_2"
[(set (reg FLAGS_REG)
(compare (any_or:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
(const_int 0)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(any_or:SWI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,*,apx_ndd,apx_ndd")
(set_attr "mode" "<MODE>")])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
;; ??? Special case for immediate operand is missing - it is tricky.
(define_insn "*<code>si_2_zext"
[(set (reg FLAGS_REG)
- (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+ (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (<CODE>, SImode, operands)"
- "<logic>{l}\t{%2, %k0|%k0, %2}"
+ && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{l}\t{%2, %k0|%k0, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*<code>si_2_zext_imm"
[(set (reg FLAGS_REG)
(compare (any_or:SI
- (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0,rm")
+ (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z,Z"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (<CODE>, SImode, operands)"
- "<logic>{l}\t{%2, %k0|%k0, %2}"
+ && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{l}\t{%2, %k0|%k0, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*<code><mode>_3"
@@ -12731,6 +12777,7 @@ (define_insn "*<code><mode>_3"
;; Don't do the splitting with memory operands, since it introduces risk
;; of memory mismatch stalls. We may want to do the splitting for optimizing
;; for size, but that can (should?) be handled by generic code instead.
+;; Don't do the splitting for APX NDD as NDD does not support *h registers.
(define_split
[(set (match_operand:SWI248 0 "QIreg_operand")
(any_or:SWI248 (match_operand:SWI248 1 "register_operand")
@@ -12738,7 +12785,8 @@ (define_split
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
- && !(INTVAL (operands[2]) & ~(255 << 8))"
+ && !(INTVAL (operands[2]) & ~(255 << 8))
+ && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
[(parallel
[(set (zero_extract:HI (match_dup 0)
(const_int 8)
@@ -12776,7 +12824,9 @@ (define_split
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& !(INTVAL (operands[2]) & ~255)
- && (INTVAL (operands[2]) & 128)"
+ && (INTVAL (operands[2]) & 128)
+ && !(TARGET_APX_NDD
+ && !rtx_equal_p (operands[0], operands[1]))"
[(parallel [(set (strict_low_part (match_dup 0))
(any_or:QI (match_dup 1)
(match_dup 2)))
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index b34194b762d..7541a41a01e 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -94,6 +94,24 @@ FOO (int, and, &)
FOO1 (int, and, &)
FOO (long, and, &)
FOO1 (long, and, &)
+
+FOO (char, or, |)
+FOO1 (char, or, |)
+FOO (short, or, |)
+FOO1 (short, or, |)
+FOO (int, or, |)
+FOO1 (int, or, |)
+FOO (long, or, |)
+FOO1 (long, or, |)
+
+FOO (char, xor, ^)
+FOO1 (char, xor, ^)
+FOO (short, xor, ^)
+FOO1 (short, xor, ^)
+FOO (int, xor, ^)
+FOO1 (int, xor, ^)
+FOO (long, xor, ^)
+FOO1 (long, xor, ^)
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
@@ -108,3 +126,11 @@ FOO1 (long, and, &)
/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */
/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "orb\[^\n\r]*1, \\(%rdi\\), %al" 2} } */
+/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 6 } } */
+/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "xorb\[^\n\r]*1, \\(%rdi\\), %al" 1 } } */
+/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */
+/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 12/16] [APX NDD] Support APX NDD for left shift insns
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (10 preceding siblings ...)
2023-11-15 9:47 ` [PATCH 11/16] [APX NDD] Support APX NDD for or/xor insn Hongyu Wang
@ 2023-11-15 9:47 ` Hongyu Wang
2023-11-15 9:47 ` [PATCH 13/16] [APX NDD] Support APX NDD for right " Hongyu Wang
` (3 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:47 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu
For left shift, there is an optimization TARGET_DOUBLE_WITH_ADD that shl
1 can be optimized to add. As NDD form of add requires src operand to
be register since NDD cannot take 2 memory src, we currently just keep
using NDD form shift instead of add.
The optimization TARGET_SHIFT1 will try to remove constant 1, but under NDD it
could create ambiguous mnemonic like sal %ecx, %edx, this will be encoded to
legacy shift sal %cl, %edx which changes the expected behavior that %ecx is
actually considered as NDD src. Under such case we emit $1 explicitly when
operands[1] is CX reg.
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add ASHIFT.
* config/i386/i386.md (*ashl<mode>3_1): Extend with new
alternatives to support NDD, limit the new alternative to
generate sal only, and adjust output template for NDD.
(*ashlsi3_1_zext): Likewise.
(*ashlhi3_1): Likewise.
(*ashlqi3_1): Likewise.
(*ashl<mode>3_cmp): Likewise.
(*ashlsi3_cmp_zext): Likewise.
(*ashl<mode>3_cconly): Likewise.
(*ashl<mode>3_doubleword): Likewise.
(*ashl<dwi>3_doubleword_highpart): Adjust codegen for NDD.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add tests for sal.
---
gcc/config/i386/i386-expand.cc | 1 +
gcc/config/i386/i386.md | 194 ++++++++++++++++--------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 22 +++
3 files changed, 150 insertions(+), 67 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 5f02d557a50..7e3080482a6 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1276,6 +1276,7 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
case AND:
case IOR:
case XOR:
+ case ASHIFT:
return true;
default:
return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index cf9842d1a49..a0e81545f17 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14103,13 +14103,14 @@ (define_insn_and_split "*ashl<dwi>3_doubleword_mask_1"
})
(define_insn "ashl<mode>3_doubleword"
- [(set (match_operand:DWI 0 "register_operand" "=&r")
- (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n")
- (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+ [(set (match_operand:DWI 0 "register_operand" "=&r,r")
+ (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n,r")
+ (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
(clobber (reg:CC FLAGS_REG))]
""
"#"
- [(set_attr "type" "multi")])
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "multi")])
(define_split
[(set (match_operand:DWI 0 "register_operand")
@@ -14149,11 +14150,14 @@ (define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
[(const_int 0)]
{
split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
+ bool use_ndd = ix86_can_use_ndd_p (ASHIFT)
+ && !rtx_equal_p (operands[3], operands[1]);
int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
- if (!rtx_equal_p (operands[3], operands[1]))
+ if (!rtx_equal_p (operands[3], operands[1]) || !use_ndd)
emit_move_insn (operands[3], operands[1]);
+ rtx op_tmp = use_ndd? operands[1] : operands[3];
if (bits > 0)
- emit_insn (gen_ashl<mode>3 (operands[3], operands[3], GEN_INT (bits)));
+ emit_insn (gen_ashl<mode>3 (operands[3], op_tmp, GEN_INT (bits)));
ix86_expand_clear (operands[0]);
DONE;
})
@@ -14460,12 +14464,14 @@ (define_insn "*bmi2_ashl<mode>3_1"
(set_attr "mode" "<MODE>")])
(define_insn "*ashl<mode>3_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
- (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
+ (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands,
+ ix86_can_use_ndd_p (ASHIFT))"
{
+ bool use_ndd = (which_alternative == 4);
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -14480,18 +14486,24 @@ (define_insn "*ashl<mode>3_1"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "sal{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "sal{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "sal{<imodesuffix>}\t%0";
else
- return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,*,bmi2,<kmov_isa>")
+ [(set_attr "isa" "*,*,bmi2,<kmov_isa>,apx_ndd")
(set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
(eq_attr "alternative" "2")
(const_string "ishiftx")
+ (eq_attr "alternative" "4")
+ (const_string "ishift")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
@@ -14533,13 +14545,15 @@ (define_insn "*bmi2_ashlsi3_1_zext"
(set_attr "mode" "SI")])
(define_insn "*ashlsi3_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
(zero_extend:DI
- (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
- (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands,
+ ix86_can_use_ndd_p (ASHIFT))"
{
+ bool use_ndd = (which_alternative == 3);
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -14552,18 +14566,24 @@ (define_insn "*ashlsi3_1_zext"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "sal{l}\t%k0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "sal{l}\t{%1, %k0|%k0, %1}"
+ : "sal{l}\t%k0";
else
- return "sal{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "sal{l}\t{%2, %k0|%k0, %2}";
}
}
- [(set_attr "isa" "*,*,bmi2")
+ [(set_attr "isa" "*,*,bmi2,apx_ndd")
(set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
(eq_attr "alternative" "2")
(const_string "ishiftx")
+ (eq_attr "alternative" "3")
+ (const_string "ishift")
(and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 2 "const1_operand"))
(const_string "alu")
@@ -14593,12 +14613,14 @@ (define_split
"operands[2] = gen_lowpart (SImode, operands[2]);")
(define_insn "*ashlhi3_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
- (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
- (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+ "ix86_binary_operator_ok (ASHIFT, HImode, operands,
+ ix86_can_use_ndd_p (ASHIFT))"
{
+ bool use_ndd = (which_alternative == 3);
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -14611,18 +14633,24 @@ (define_insn "*ashlhi3_1"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "sal{w}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "sal{w}\t{%1, %0|%0, %1}"
+ : "sal{w}\t%0";
else
- return "sal{w}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "sal{w}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,*,avx512f")
+ [(set_attr "isa" "*,*,avx512f,apx_ndd")
(set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
(eq_attr "alternative" "2")
(const_string "msklog")
+ (eq_attr "alternative" "3")
+ (const_string "ishift")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
@@ -14638,15 +14666,17 @@ (define_insn "*ashlhi3_1"
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
- (set_attr "mode" "HI,SI,HI")])
+ (set_attr "mode" "HI,SI,HI,HI")])
(define_insn "*ashlqi3_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
- (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
- (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+ "ix86_binary_operator_ok (ASHIFT, QImode, operands,
+ ix86_can_use_ndd_p (ASHIFT))"
{
+ bool use_ndd = (which_alternative == 4);
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -14662,28 +14692,34 @@ (define_insn "*ashlqi3_1"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
{
if (get_attr_mode (insn) == MODE_SI)
- return "sal{l}\t%k0";
+ return use_ndd ? "sal{l}\t{%1, %k0|%k0, %1}"
+ : "sal{l}\t%k0";
else
return "sal{b}\t%0";
}
else
{
if (get_attr_mode (insn) == MODE_SI)
- return "sal{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "sal{l}\t{%2, %k0|%k0, %2}";
else
return "sal{b}\t{%2, %0|%0, %2}";
}
}
}
- [(set_attr "isa" "*,*,*,avx512dq")
+ [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
(set (attr "type")
(cond [(eq_attr "alternative" "2")
(const_string "lea")
(eq_attr "alternative" "3")
(const_string "msklog")
+ (eq_attr "alternative" "4")
+ (const_string "ishift")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
@@ -14699,10 +14735,10 @@ (define_insn "*ashlqi3_1"
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
- (set_attr "mode" "QI,SI,SI,QI")
+ (set_attr "mode" "QI,SI,SI,QI,SI")
;; Potential partial reg stall on alternative 1.
(set (attr "preferred_for_speed")
- (cond [(eq_attr "alternative" "1")
+ (cond [(eq_attr "alternative" "1,4")
(symbol_ref "!TARGET_PARTIAL_REG_STALL")]
(symbol_ref "true")))])
@@ -14797,10 +14833,10 @@ (define_split
(define_insn "*ashl<mode>3_cmp"
[(set (reg FLAGS_REG)
(compare
- (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
- (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+ (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+ (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
(const_int 0)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
(ashift:SWI (match_dup 1) (match_dup 2)))]
"(optimize_function_for_size_p (cfun)
|| !TARGET_PARTIAL_FLAG_REG_STALL
@@ -14808,8 +14844,10 @@ (define_insn "*ashl<mode>3_cmp"
&& (TARGET_SHIFT1
|| (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
&& ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+ && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands,
+ ix86_can_use_ndd_p (ASHIFT))"
{
+ bool use_ndd = (which_alternative == 1);
switch (get_attr_type (insn))
{
case TYPE_ALU:
@@ -14818,14 +14856,21 @@ (define_insn "*ashl<mode>3_cmp"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "sal{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "sal{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "sal{<imodesuffix>}\t%0";
else
- return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
- (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+ [(set_attr "isa" "*,apx_ndd")
+ (set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "ishift")
+ (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
(const_string "alu")
@@ -14845,10 +14890,10 @@ (define_insn "*ashl<mode>3_cmp"
(define_insn "*ashlsi3_cmp_zext"
[(set (reg FLAGS_REG)
(compare
- (ashift:SI (match_operand:SI 1 "register_operand" "0")
+ (ashift:SI (match_operand:SI 1 "register_operand" "0,r")
(match_operand:QI 2 "const_1_to_31_operand"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT
&& (optimize_function_for_size_p (cfun)
@@ -14857,8 +14902,10 @@ (define_insn "*ashlsi3_cmp_zext"
&& (TARGET_SHIFT1
|| TARGET_DOUBLE_WITH_ADD)))
&& ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands,
+ ix86_can_use_ndd_p (ASHIFT))"
{
+ bool use_ndd = (which_alternative == 1);
switch (get_attr_type (insn))
{
case TYPE_ALU:
@@ -14867,14 +14914,20 @@ (define_insn "*ashlsi3_cmp_zext"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "sal{l}\t%k0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "sal{l}\t{%1, %k0|%k0, %1}"
+ : "sal{l}\t%k0";
else
- return "sal{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "sal{l}\t{%2, %k0|%k0, %2}";
}
}
- [(set (attr "type")
- (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
+ [(set_attr "isa" "*,apx_ndd")
+ (set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "ishift")
+ (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 2 "const1_operand"))
(const_string "alu")
]
@@ -14893,10 +14946,10 @@ (define_insn "*ashlsi3_cmp_zext"
(define_insn "*ashl<mode>3_cconly"
[(set (reg FLAGS_REG)
(compare
- (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
- (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+ (ashift:SWI (match_operand:SWI 1 "register_operand" "0,r")
+ (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
(const_int 0)))
- (clobber (match_scratch:SWI 0 "=<r>"))]
+ (clobber (match_scratch:SWI 0 "=<r>,r"))]
"(optimize_function_for_size_p (cfun)
|| !TARGET_PARTIAL_FLAG_REG_STALL
|| (operands[2] == const1_rtx
@@ -14904,22 +14957,29 @@ (define_insn "*ashl<mode>3_cconly"
|| TARGET_DOUBLE_WITH_ADD)))
&& ix86_match_ccmode (insn, CCGOCmode)"
{
+ bool use_ndd = (which_alternative == 1);
switch (get_attr_type (insn))
{
case TYPE_ALU:
gcc_assert (operands[2] == const1_rtx);
return "add{<imodesuffix>}\t%0, %0";
- default:
+ default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "sal{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "sal{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "sal{<imodesuffix>}\t%0";
else
- return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
- (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+ [(set_attr "isa" "*,apx_ndd")
+ (set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "ishift")
+ (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
(const_string "alu")
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 7541a41a01e..481ec8b00a8 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -29,6 +29,16 @@ foo2_##OP_NAME##_##TYPE (TYPE *a, TYPE b) \
return c; \
}
+#define FOO3(TYPE, OP_NAME, OP, IMM) \
+TYPE \
+__attribute__ ((noipa)) \
+foo3_##OP_NAME##_##TYPE (TYPE a) \
+{ \
+ TYPE b = a OP IMM; \
+ return b; \
+}
+
+
#define F(TYPE, OP_NAME, OP) \
TYPE \
__attribute__ ((noipa)) \
@@ -112,6 +122,16 @@ FOO (int, xor, ^)
FOO1 (int, xor, ^)
FOO (long, xor, ^)
FOO1 (long, xor, ^)
+
+FOO (char, shl, <<)
+FOO3 (char, shl, <<, 7)
+FOO (short, shl, <<)
+FOO3 (short, shl, <<, 7)
+FOO (int, shl, <<)
+FOO3 (int, shl, <<, 7)
+FOO (long, shl, <<)
+FOO3 (long, shl, <<, 7)
+
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
@@ -134,3 +154,5 @@ FOO1 (long, xor, ^)
/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */
/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 13/16] [APX NDD] Support APX NDD for right shift insns
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (11 preceding siblings ...)
2023-11-15 9:47 ` [PATCH 12/16] [APX NDD] Support APX NDD for left shift insns Hongyu Wang
@ 2023-11-15 9:47 ` Hongyu Wang
2023-11-15 9:47 ` [PATCH 14/16] [APX NDD] Support APX NDD for rotate insns Hongyu Wang
` (2 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:47 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu
Similar to LSHIFT, rshift should also emit $1 for NDD form with CX_REG as
operands[1].
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add LSHIFTRT
and RSHIFTRT.
* config/i386/i386.md (ashr<mode>3_cvt): Extend with new
alternatives to support NDD, and adjust output templates.
(*ashrsi3_cvt_zext): Likewise.
(*ashr<mode>3_1): Likewise for SI/DI mode.
(*highpartdisi2): Likewise.
(*lshr<mode>3_1): Likewise.
(*<insn>si3_1_zext): Likewise.
(*ashr<mode>3_1): Likewise for QI/HI mode.
(*lshrqi3_1): Likewise.
(*lshrhi3_1): Likewise.
(<insn><mode>3_cmp): Likewise.
(*<insn>si3_cmp_zext): Likewise.
(*<insn><mode>3_cconly): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add l/ashiftrt tests.
---
gcc/config/i386/i386-expand.cc | 2 +
gcc/config/i386/i386.md | 265 +++++++++++++++---------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 24 +++
3 files changed, 191 insertions(+), 100 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 7e3080482a6..8e040346fbb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1277,6 +1277,8 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
case IOR:
case XOR:
case ASHIFT:
+ case ASHIFTRT:
+ case LSHIFTRT:
return true;
default:
return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a0e81545f17..3ff333d4a41 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15490,39 +15490,45 @@ (define_mode_attr cvt_mnemonic
[(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
(define_insn "ashr<mode>3_cvt"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
(ashiftrt:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "*a,0")
+ (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm")
(match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC FLAGS_REG))]
"INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
&& (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
- && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
+ && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands,
+ ix86_can_use_ndd_p (ASHIFTRT))"
"@
<cvt_mnemonic>
- sar{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "imovx,ishift")
- (set_attr "prefix_0f" "0,*")
- (set_attr "length_immediate" "0,*")
- (set_attr "modrm" "0,1")
+ sar{<imodesuffix>}\t{%2, %0|%0, %2}
+ sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd")
+ (set_attr "type" "imovx,ishift,ishift")
+ (set_attr "prefix_0f" "0,*,*")
+ (set_attr "length_immediate" "0,*,*")
+ (set_attr "modrm" "0,1,1")
(set_attr "mode" "<MODE>")])
(define_insn "*ashrsi3_cvt_zext"
- [(set (match_operand:DI 0 "register_operand" "=*d,r")
+ [(set (match_operand:DI 0 "register_operand" "=*d,r,r")
(zero_extend:DI
- (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
+ (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0,r")
(match_operand:QI 2 "const_int_operand"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && INTVAL (operands[2]) == 31
&& (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
- && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands,
+ ix86_can_use_ndd_p (ASHIFTRT))"
"@
{cltd|cdq}
- sar{l}\t{%2, %k0|%k0, %2}"
- [(set_attr "type" "imovx,ishift")
- (set_attr "prefix_0f" "0,*")
- (set_attr "length_immediate" "0,*")
- (set_attr "modrm" "0,1")
+ sar{l}\t{%2, %k0|%k0, %2}
+ sar{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "isa" "*,*,apx_ndd")
+ (set_attr "type" "imovx,ishift,ishift")
+ (set_attr "prefix_0f" "0,*,*")
+ (set_attr "length_immediate" "0,*,*")
+ (set_attr "modrm" "0,1,1")
(set_attr "mode" "SI")])
(define_expand "@x86_shift<mode>_adj_3"
@@ -15564,13 +15570,15 @@ (define_insn "*bmi2_<insn><mode>3_1"
(set_attr "mode" "<MODE>")])
(define_insn "*ashr<mode>3_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
(ashiftrt:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
+ (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands,
+ ix86_can_use_ndd_p (ASHIFTRT))"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_ISHIFTX:
@@ -15578,14 +15586,18 @@ (define_insn "*ashr<mode>3_1"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "sar{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "sar{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "sar{<imodesuffix>}\t%0";
else
- return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,bmi2")
- (set_attr "type" "ishift,ishiftx")
+ [(set_attr "isa" "*,bmi2,apx_ndd")
+ (set_attr "type" "ishift,ishiftx,ishift")
(set (attr "length_immediate")
(if_then_else
(and (match_operand 2 "const1_operand")
@@ -15598,8 +15610,8 @@ (define_insn "*ashr<mode>3_1"
;; Specialization of *lshr<mode>3_1 below, extracting the SImode
;; highpart of a DI to be extracted, but allowing it to be clobbered.
(define_insn_and_split "*highpartdisi2"
- [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k") 0)
- (lshiftrt:DI (match_operand:DI 1 "register_operand" "0,0,k")
+ [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k,r") 0)
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "0,0,k,r")
(const_int 32)))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT"
@@ -15618,16 +15630,20 @@ (define_insn_and_split "*highpartdisi2"
DONE;
}
operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-})
+}
+[(set_attr "isa" "*,*,*,apx_ndd")])
+
(define_insn "*lshr<mode>3_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
(lshiftrt:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))
+ (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands,
+ ix86_can_use_ndd_p (LSHIFTRT))"
{
+ bool use_ndd = (which_alternative == 3);
switch (get_attr_type (insn))
{
case TYPE_ISHIFTX:
@@ -15636,14 +15652,18 @@ (define_insn "*lshr<mode>3_1"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "shr{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "shr{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "shr{<imodesuffix>}\t%0";
else
- return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,bmi2,<kmov_isa>")
- (set_attr "type" "ishift,ishiftx,msklog")
+ [(set_attr "isa" "*,bmi2,<kmov_isa>,apx_ndd")
+ (set_attr "type" "ishift,ishiftx,msklog,ishift")
(set (attr "length_immediate")
(if_then_else
(and (and (match_operand 2 "const1_operand")
@@ -15676,13 +15696,15 @@ (define_insn "*bmi2_<insn>si3_1_zext"
(set_attr "mode" "SI")])
(define_insn "*<insn>si3_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
- (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
- (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
+ (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,r,cI"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+ "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_ISHIFTX:
@@ -15690,14 +15712,18 @@ (define_insn "*<insn>si3_1_zext"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<shift>{l}\t%k0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "<shift>{l}\t{%1, %k0|%k0, %1}"
+ : "<shift>{l}\t%k0";
else
- return "<shift>{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "<shift>{l}\t{%2, %k0|%k0, %2}";
}
}
- [(set_attr "isa" "*,bmi2")
- (set_attr "type" "ishift,ishiftx")
+ [(set_attr "isa" "*,bmi2,apx_ndd")
+ (set_attr "type" "ishift,ishiftx,ishift")
(set (attr "length_immediate")
(if_then_else
(and (match_operand 2 "const1_operand")
@@ -15720,20 +15746,26 @@ (define_split
"operands[2] = gen_lowpart (SImode, operands[2]);")
(define_insn "*ashr<mode>3_1"
- [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+ [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
(ashiftrt:SWI12
- (match_operand:SWI12 1 "nonimmediate_operand" "0")
- (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+ (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands,
+ ix86_can_use_ndd_p (ASHIFTRT))"
{
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "sar{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(which_alternative && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return which_alternative ? "sar{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "sar{<imodesuffix>}\t%0";
else
- return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return which_alternative ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
}
- [(set_attr "type" "ishift")
+ [(set_attr "isa" "*, apx_ndd")
+ (set_attr "type" "ishift")
(set (attr "length_immediate")
(if_then_else
(and (match_operand 2 "const1_operand")
@@ -15744,29 +15776,35 @@ (define_insn "*ashr<mode>3_1"
(set_attr "mode" "<MODE>")])
(define_insn "*lshrqi3_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k")
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k,r")
(lshiftrt:QI
- (match_operand:QI 1 "nonimmediate_operand" "0, k")
- (match_operand:QI 2 "nonmemory_operand" "cI,Wb")))
+ (match_operand:QI 1 "nonimmediate_operand" "0, k, rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "ix86_binary_operator_ok (LSHIFTRT, QImode, operands,
+ ix86_can_use_ndd_p (LSHIFTRT))"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_ISHIFT:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "shr{b}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "shr{b}\t{%1, %0|%0, %1}"
+ : "shr{b}\t%0";
else
- return "shr{b}\t{%2, %0|%0, %2}";
+ return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}"
+ : "shr{b}\t{%2, %0|%0, %2}";
case TYPE_MSKLOG:
return "#";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "*,avx512dq")
- (set_attr "type" "ishift,msklog")
+ [(set_attr "isa" "*,avx512dq,apx_ndd")
+ (set_attr "type" "ishift,msklog,ishift")
(set (attr "length_immediate")
(if_then_else
(and (and (match_operand 2 "const1_operand")
@@ -15778,29 +15816,35 @@ (define_insn "*lshrqi3_1"
(set_attr "mode" "QI")])
(define_insn "*lshrhi3_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k")
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
(lshiftrt:HI
- (match_operand:HI 1 "nonimmediate_operand" "0, k")
- (match_operand:QI 2 "nonmemory_operand" "cI, Ww")))
+ (match_operand:HI 1 "nonimmediate_operand" "0, k, rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands,
+ ix86_can_use_ndd_p (LSHIFTRT))"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_ISHIFT:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "shr{w}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "shr{w}\t{%1, %0|%0, %1}"
+ : "shr{w}\t%0";
else
- return "shr{w}\t{%2, %0|%0, %2}";
+ return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "shr{w}\t{%2, %0|%0, %2}";
case TYPE_MSKLOG:
return "#";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "*, avx512f")
- (set_attr "type" "ishift,msklog")
+ [(set_attr "isa" "*, avx512f, apx_ndd")
+ (set_attr "type" "ishift,msklog,ishift")
(set (attr "length_immediate")
(if_then_else
(and (and (match_operand 2 "const1_operand")
@@ -15853,25 +15897,31 @@ (define_insn "*<insn><mode>3_cmp"
[(set (reg FLAGS_REG)
(compare
(any_shiftrt:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "0")
- (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+ (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+ (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
(const_int 0)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
(any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
"(optimize_function_for_size_p (cfun)
|| !TARGET_PARTIAL_FLAG_REG_STALL
|| (operands[2] == const1_rtx
&& TARGET_SHIFT1))
&& ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
{
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<shift>{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(which_alternative && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return which_alternative ? "<shift>{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "<shift>{<imodesuffix>}\t%0";
else
- return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return which_alternative ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
- [(set_attr "type" "ishift")
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "ishift")
(set (attr "length_immediate")
(if_then_else
(and (match_operand 2 "const1_operand")
@@ -15884,10 +15934,10 @@ (define_insn "*<insn><mode>3_cmp"
(define_insn "*<insn>si3_cmp_zext"
[(set (reg FLAGS_REG)
(compare
- (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0,r")
(match_operand:QI 2 "const_1_to_31_operand"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT
&& (optimize_function_for_size_p (cfun)
@@ -15895,15 +15945,20 @@ (define_insn "*<insn>si3_cmp_zext"
|| (operands[2] == const1_rtx
&& TARGET_SHIFT1))
&& ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+ && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
{
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<shift>{l}\t%k0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(which_alternative && REGNO (operands[1]) == CX_REG))
+ return which_alternative ? "<shift>{l}\t{%1, %k0|%k0, %1}"
+ : "<shift>{l}\t%k0";
else
- return "<shift>{l}\t{%2, %k0|%k0, %2}";
+ return which_alternative ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "<shift>{l}\t{%2, %k0|%k0, %2}";
}
- [(set_attr "type" "ishift")
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "ishift")
(set (attr "length_immediate")
(if_then_else
(and (match_operand 2 "const1_operand")
@@ -15917,10 +15972,10 @@ (define_insn "*<insn><mode>3_cconly"
[(set (reg FLAGS_REG)
(compare
(any_shiftrt:SWI
- (match_operand:SWI 1 "register_operand" "0")
- (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+ (match_operand:SWI 1 "register_operand" "0,r")
+ (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
(const_int 0)))
- (clobber (match_scratch:SWI 0 "=<r>"))]
+ (clobber (match_scratch:SWI 0 "=<r>,r"))]
"(optimize_function_for_size_p (cfun)
|| !TARGET_PARTIAL_FLAG_REG_STALL
|| (operands[2] == const1_rtx
@@ -15928,12 +15983,18 @@ (define_insn "*<insn><mode>3_cconly"
&& ix86_match_ccmode (insn, CCGOCmode)"
{
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<shift>{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(which_alternative && REGNO (operands[1]) == CX_REG))
+ return which_alternative
+ ? "<shift>{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "<shift>{<imodesuffix>}\t%0";
else
- return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return which_alternative
+ ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
- [(set_attr "type" "ishift")
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "ishift")
(set (attr "length_immediate")
(if_then_else
(and (match_operand 2 "const1_operand")
@@ -16537,18 +16598,22 @@ (define_insn "rcrdi2"
;; Versions of sar and shr that set the carry flag.
(define_insn "<insn><mode>3_carry"
[(set (reg:CCC FLAGS_REG)
- (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "register_operand" "0")
+ (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "register_operand" "0,r")
(const_int 1))
(const_int 0)] UNSPEC_CC_NE))
- (set (match_operand:SWI48 0 "register_operand" "=r")
+ (set (match_operand:SWI48 0 "register_operand" "=r,r")
(any_shiftrt:SWI48 (match_dup 1) (const_int 1)))]
""
{
- if (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- return "<shift>{<imodesuffix>}\t%0";
- return "<shift>{<imodesuffix>}\t{1, %0|%0, 1}";
+ if ((TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(which_alternative && REGNO (operands[1]) == CX_REG))
+ return which_alternative ? "<shift>{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "<shift>{<imodesuffix>}\t%0";
+ return which_alternative ? "<shift>{<imodesuffix>}\t{$1, %1, %0|%0, %1, 1}"
+ : "<shift>{<imodesuffix>}\t{$1, %0|%0, 1}";
}
- [(set_attr "type" "ishift1")
+ [(set_attr "isa" "*, apx_ndd")
+ (set_attr "type" "ishift1")
(set (attr "length_immediate")
(if_then_else
(ior (match_test "TARGET_SHIFT1")
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 481ec8b00a8..28c0df72988 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -2,6 +2,8 @@
/* { dg-options "-mapxf -O2" } */
/* { dg-final { scan-assembler-not "movl"} } */
+#include <stdint.h>
+
#define FOO(TYPE, OP_NAME, OP) \
TYPE \
__attribute__ ((noipa)) \
@@ -132,6 +134,24 @@ FOO3 (int, shl, <<, 7)
FOO (long, shl, <<)
FOO3 (long, shl, <<, 7)
+FOO (char, sar, >>)
+FOO3 (char, sar, >>, 7)
+FOO (short, sar, >>)
+FOO3 (short, sar, >>, 7)
+FOO (int, sar, >>)
+FOO3 (int, sar, >>, 7)
+FOO (long, sar, >>)
+FOO3 (long, sar, >>, 7)
+
+FOO (uint8_t, shr, >>)
+FOO3 (uint8_t, shr, >>, 7)
+FOO (uint16_t, shr, >>)
+FOO3 (uint16_t, shr, >>, 7)
+FOO (uint32_t, shr, >>)
+FOO3 (uint32_t, shr, >>, 7)
+FOO (uint64_t, shr, >>)
+FOO3 (uint64_t, shr, >>, 7)
+
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
@@ -156,3 +176,7 @@ FOO3 (long, shl, <<, 7)
/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 14/16] [APX NDD] Support APX NDD for rotate insns
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (12 preceding siblings ...)
2023-11-15 9:47 ` [PATCH 13/16] [APX NDD] Support APX NDD for right " Hongyu Wang
@ 2023-11-15 9:47 ` Hongyu Wang
2023-11-15 9:47 ` [PATCH 15/16] [APX NDD] Support APX NDD for shld/shrd insns Hongyu Wang
2023-11-15 9:47 ` [PATCH 16/16] [APX NDD] Support APX NDD for cmove insns Hongyu Wang
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:47 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add ROTATE
and ROTATERT.
* config/i386/i386.md (*<insn><mode>3_1): Extend with a new
alternative to support NDD for SI/DI rotate, and adjust output
template.
(*<insn>si3_1_zext): Likewise.
(*<insn><mode>3_1): Likewise for QI/HI modes.
(rcrsi2): Likewise.
(rcrdi2): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add test for left/right rotate.
---
gcc/config/i386/i386-expand.cc | 2 +
gcc/config/i386/i386.md | 91 ++++++++++++++++---------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 20 ++++++
3 files changed, 80 insertions(+), 33 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 8e040346fbb..ab6f14485d6 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1279,6 +1279,8 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
case ASHIFT:
case ASHIFTRT:
case LSHIFTRT:
+ case ROTATE:
+ case ROTATERT:
return true;
default:
return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3ff333d4a41..760c0d32f4d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16362,13 +16362,15 @@ (define_insn "*bmi2_rorx<mode>3_1"
(set_attr "mode" "<MODE>")])
(define_insn "*<insn><mode>3_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
(any_rotate:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
+ (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_ROTATEX:
@@ -16376,14 +16378,18 @@ (define_insn "*<insn><mode>3_1"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<rotate>{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "<rotate>{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "<rotate>{<imodesuffix>}\t%0";
else
- return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,bmi2")
- (set_attr "type" "rotate,rotatex")
+ [(set_attr "isa" "*,bmi2,apx_ndd")
+ (set_attr "type" "rotate,rotatex,rotate")
(set (attr "preferred_for_size")
(cond [(eq_attr "alternative" "0")
(symbol_ref "true")]
@@ -16433,13 +16439,14 @@ (define_insn "*bmi2_rorxsi3_1_zext"
(set_attr "mode" "SI")])
(define_insn "*<insn>si3_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
- (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
- (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
+ (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,I,cI"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
{
+ bool use_ndd = (which_alternative == 2);
switch (get_attr_type (insn))
{
case TYPE_ROTATEX:
@@ -16447,14 +16454,18 @@ (define_insn "*<insn>si3_1_zext"
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<rotate>{l}\t%k0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(use_ndd && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return use_ndd ? "<rotate>{l}\t{%1, %k0|%k0, %1}"
+ : "<rotate>{l}\t%k0";
else
- return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "<rotate>{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "<rotate>{l}\t{%2, %k0|%k0, %2}";
}
}
- [(set_attr "isa" "*,bmi2")
- (set_attr "type" "rotate,rotatex")
+ [(set_attr "isa" "*,bmi2,apx_ndd")
+ (set_attr "type" "rotate,rotatex,rotate")
(set (attr "preferred_for_size")
(cond [(eq_attr "alternative" "0")
(symbol_ref "true")]
@@ -16498,19 +16509,27 @@ (define_split
(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
(define_insn "*<insn><mode>3_1"
- [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
- (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
- (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+ [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
+ (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
{
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<rotate>{<imodesuffix>}\t%0";
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !(which_alternative && REG_P (operands[1])
+ && REGNO (operands[1]) == CX_REG))
+ return which_alternative
+ ? "<rotate>{<imodesuffix>}\t{%1, %0|%0, %1}"
+ : "<rotate>{<imodesuffix>}\t%0";
else
- return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return which_alternative
+ ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
}
- [(set_attr "type" "rotate")
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "rotate")
(set (attr "length_immediate")
(if_then_else
(and (match_operand 2 "const1_operand")
@@ -16567,31 +16586,37 @@ (define_split
;; Rotations through carry flag
(define_insn "rcrsi2"
- [(set (match_operand:SI 0 "register_operand" "=r")
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
(plus:SI
- (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "0,r")
(const_int 1))
(ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0))
(const_int 31))))
(clobber (reg:CC FLAGS_REG))]
""
- "rcr{l}\t%0"
- [(set_attr "type" "ishift1")
+ "@
+ rcr{l}\t{%1, %0|%0, %1}
+ rcr{l}\t%0"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "ishift1")
(set_attr "memory" "none")
(set_attr "length_immediate" "0")
(set_attr "mode" "SI")])
(define_insn "rcrdi2"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(plus:DI
- (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "0,r")
(const_int 1))
(ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0))
(const_int 63))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT"
- "rcr{q}\t%0"
- [(set_attr "type" "ishift1")
+ "@
+ rcr{q}\t{%1, %0|%0, %1}
+ rcr{q}\t%0"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "ishift1")
(set_attr "length_immediate" "0")
(set_attr "mode" "DI")])
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 28c0df72988..b8b70511023 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -40,6 +40,14 @@ foo3_##OP_NAME##_##TYPE (TYPE a) \
return b; \
}
+#define FOO4(TYPE, OP_NAME, OP1, OP2, IMM1) \
+TYPE \
+__attribute__ ((noipa)) \
+foo4_##OP_NAME##_##TYPE (TYPE a) \
+{ \
+ TYPE b = (a OP1 IMM1 | a OP2 (8 * sizeof(TYPE) - IMM1)); \
+ return b; \
+}
#define F(TYPE, OP_NAME, OP) \
TYPE \
@@ -152,6 +160,16 @@ FOO3 (uint32_t, shr, >>, 7)
FOO (uint64_t, shr, >>)
FOO3 (uint64_t, shr, >>, 7)
+FOO4 (uint8_t, ror, >>, <<, 1)
+FOO4 (uint16_t, ror, >>, <<, 1)
+FOO4 (uint32_t, ror, >>, <<, 1)
+FOO4 (uint64_t, ror, >>, <<, 1)
+
+FOO4 (uint8_t, rol, <<, >>, 1)
+FOO4 (uint16_t, rol, <<, >>, 1)
+FOO4 (uint32_t, rol, <<, >>, 1)
+FOO4 (uint64_t, rol, <<, >>, 1)
+
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
@@ -180,3 +198,5 @@ FOO3 (uint64_t, shr, >>, 7)
/* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "ror(?:b|l|w|q)\[^\n\r]%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "rol(?:b|l|w|q)\[^\n\r]%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 15/16] [APX NDD] Support APX NDD for shld/shrd insns
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (13 preceding siblings ...)
2023-11-15 9:47 ` [PATCH 14/16] [APX NDD] Support APX NDD for rotate insns Hongyu Wang
@ 2023-11-15 9:47 ` Hongyu Wang
2023-11-15 9:47 ` [PATCH 16/16] [APX NDD] Support APX NDD for cmove insns Hongyu Wang
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:47 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu
For shld/shrd insns, the old pattern use match_dup 0 as its shift src and use
+r*m as its constraint. To support NDD we added new define_insns to handle NDD
form pattern with extra input and dest operand to be fixed in register.
gcc/ChangeLog:
* config/i386/i386.md (x86_64_shld_ndd): New define_insn.
(x86_64_shld_ndd_1): Likewise.
(*x86_64_shld_ndd_2): Likewise.
(x86_shld_ndd): Likewise.
(x86_shld_ndd_1): Likewise.
(*x86_shld_ndd_2): Likewise.
(x86_64_shrd_ndd): Likewise.
(x86_64_shrd_ndd_1): Likewise.
(*x86_64_shrd_ndd_2): Likewise.
(x86_shrd_ndd): Likewise.
(x86_shrd_ndd_1): Likewise.
(*x86_shrd_ndd_2): Likewise.
(*x86_64_shld_shrd_1_nozext): Adjust codegen under TARGET_APX_NDD.
(*x86_shld_shrd_1_nozext): Likewise.
(*x86_64_shrd_shld_1_nozext): Likewise.
(*x86_shrd_shld_1_nozext): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd-shld-shrd.c: New test.
---
gcc/config/i386/i386.md | 323 +++++++++++++++++-
.../gcc.target/i386/apx-ndd-shld-shrd.c | 24 ++
2 files changed, 345 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 760c0d32f4d..2e3d37d08b0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14183,6 +14183,24 @@ (define_insn "x86_64_shld"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shld_ndd"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+ (const_int 63)))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3) (const_int 63)))) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD"
+ "shld{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")])
+
(define_insn "x86_64_shld_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0)
@@ -14204,6 +14222,24 @@ (define_insn "x86_64_shld_1"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shld_ndd_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_63_operand"))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_255_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+ "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "1")])
+
+
(define_insn_and_split "*x86_64_shld_shrd_1_nozext"
[(set (match_operand:DI 0 "nonimmediate_operand")
(ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
@@ -14229,6 +14265,23 @@ (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
operands[4] = force_reg (DImode, operands[4]);
emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
}
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
else
{
operands[1] = force_reg (DImode, operands[1]);
@@ -14261,6 +14314,33 @@ (define_insn_and_split "*x86_64_shld_2"
(const_int 63)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
+(define_insn_and_split "*x86_64_shld_ndd_2"
+ [(set (match_operand:DI 0 "nonimmediate_operand")
+ (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "nonmemory_operand"))
+ (lshiftrt:DI (match_operand:DI 2 "register_operand")
+ (minus:QI (const_int 64) (match_dup 3)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 4)
+ (ior:DI (ashift:DI (match_dup 1)
+ (and:QI (match_dup 3) (const_int 63)))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI (match_dup 2))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3)
+ (const_int 63)))) 0)))
+ (clobber (reg:CC FLAGS_REG))
+ (set (match_dup 0) (match_dup 4))])]
+{
+ operands[4] = gen_reg_rtx (DImode);
+ emit_move_insn (operands[4], operands[0]);
+})
+
(define_insn "x86_shld"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (ashift:SI (match_dup 0)
@@ -14283,6 +14363,24 @@ (define_insn "x86_shld"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shld_ndd"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+ (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+ (const_int 31)))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3) (const_int 31)))) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_APX_NDD"
+ "shld{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+
(define_insn "x86_shld_1"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (ashift:SI (match_dup 0)
@@ -14304,6 +14402,24 @@ (define_insn "x86_shld_1"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shld_ndd_1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_31_operand"))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_63_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_APX_NDD &&
+ INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
+ "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "SI")])
+
+
(define_insn_and_split "*x86_shld_shrd_1_nozext"
[(set (match_operand:SI 0 "nonimmediate_operand")
(ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
@@ -14328,7 +14444,24 @@ (define_insn_and_split "*x86_shld_shrd_1_nozext"
operands[4] = force_reg (SImode, operands[4]);
emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
}
- else
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
{
operands[1] = force_reg (SImode, operands[1]);
rtx tmp = gen_reg_rtx (SImode);
@@ -14360,6 +14493,33 @@ (define_insn_and_split "*x86_shld_2"
(const_int 31)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
+(define_insn_and_split "*x86_shld_ndd_2"
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "nonmemory_operand"))
+ (lshiftrt:SI (match_operand:SI 2 "register_operand")
+ (minus:QI (const_int 32) (match_dup 3)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 4)
+ (ior:SI (ashift:SI (match_dup 1)
+ (and:QI (match_dup 3) (const_int 31)))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI (match_dup 2))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3)
+ (const_int 31)))) 0)))
+ (clobber (reg:CC FLAGS_REG))
+ (set (match_dup 0) (match_dup 4))])]
+{
+ operands[4] = gen_reg_rtx (SImode);
+ emit_move_insn (operands[4], operands[0]);
+})
+
(define_expand "@x86_shift<mode>_adj_1"
[(set (reg:CCZ FLAGS_REG)
(compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
@@ -15308,6 +15468,24 @@ (define_insn "x86_64_shrd"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shrd_ndd"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+ (const_int 63)))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3) (const_int 63)))) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD"
+ "shrd{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+
(define_insn "x86_64_shrd_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (lshiftrt:DI (match_dup 0)
@@ -15329,6 +15507,24 @@ (define_insn "x86_64_shrd_1"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shrd_ndd_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_63_operand"))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_255_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+ "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "DI")])
+
+
(define_insn_and_split "*x86_64_shrd_shld_1_nozext"
[(set (match_operand:DI 0 "nonimmediate_operand")
(ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
@@ -15354,6 +15550,23 @@ (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
operands[4] = force_reg (DImode, operands[4]);
emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2]));
}
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
else
{
operands[1] = force_reg (DImode, operands[1]);
@@ -15386,6 +15599,33 @@ (define_insn_and_split "*x86_64_shrd_2"
(const_int 63)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
+(define_insn_and_split "*x86_64_shrd_ndd_2"
+ [(set (match_operand:DI 0 "nonimmediate_operand")
+ (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "nonmemory_operand"))
+ (ashift:DI (match_operand:DI 2 "register_operand")
+ (minus:QI (const_int 64) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 4)
+ (ior:DI (lshiftrt:DI (match_dup 1)
+ (and:QI (match_dup 3) (const_int 63)))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI (match_dup 2))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3)
+ (const_int 63)))) 0)))
+ (clobber (reg:CC FLAGS_REG))
+ (set (match_dup 0) (match_dup 4))])]
+{
+ operands[4] = gen_reg_rtx (DImode);
+ emit_move_insn (operands[4], operands[0]);
+})
+
(define_insn "x86_shrd"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0)
@@ -15408,6 +15648,23 @@ (define_insn "x86_shrd"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shrd_ndd"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+ (const_int 31)))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3) (const_int 31)))) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_APX_NDD"
+ "shrd{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
(define_insn "x86_shrd_1"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0)
@@ -15429,6 +15686,24 @@ (define_insn "x86_shrd_1"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shrd_ndd_1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_31_operand"))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_63_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_APX_NDD
+ && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
+ "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "SI")])
+
+
(define_insn_and_split "*x86_shrd_shld_1_nozext"
[(set (match_operand:SI 0 "nonimmediate_operand")
(ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
@@ -15453,7 +15728,24 @@ (define_insn_and_split "*x86_shrd_shld_1_nozext"
operands[4] = force_reg (SImode, operands[4]);
emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2]));
}
- else
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_shld_ndd_1 (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
{
operands[1] = force_reg (SImode, operands[1]);
rtx tmp = gen_reg_rtx (SImode);
@@ -15485,6 +15777,33 @@ (define_insn_and_split "*x86_shrd_2"
(const_int 31)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
+(define_insn_and_split "*x86_shrd_ndd_2"
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "nonmemory_operand"))
+ (ashift:SI (match_operand:SI 2 "register_operand")
+ (minus:QI (const_int 32) (match_dup 3)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 4)
+ (ior:SI (lshiftrt:SI (match_dup 1)
+ (and:QI (match_dup 3) (const_int 31)))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI (match_dup 2))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3)
+ (const_int 31)))) 0)))
+ (clobber (reg:CC FLAGS_REG))
+ (set (match_dup 0) (match_dup 4))])]
+{
+ operands[4] = gen_reg_rtx (SImode);
+ emit_move_insn (operands[4], operands[0]);
+})
+
;; Base name for insn mnemonic.
(define_mode_attr cvt_mnemonic
[(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c b/gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c
new file mode 100644
index 00000000000..87068ea31aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -Wno-shift-count-overflow -m64 -mapxf" } */
+/* { dg-final { scan-assembler-times {(?n)shld[ql]?[\t ]*\$2} 4 } } */
+/* { dg-final { scan-assembler-times {(?n)shrd[ql]?[\t ]*\$2} 4 } } */
+
+typedef unsigned long u64;
+typedef unsigned int u32;
+
+long a;
+int c;
+const char n = 2;
+
+long test64r (long e) { long t = ((u64)a >> n) | (e << (64 - n)); return t;}
+long test64l (u64 e) { long t = (a << n) | (e >> (64 - n)); return t;}
+int test32r (int f) { int t = ((u32)c >> n) | (f << (32 - n)); return t; }
+int test32l (u32 f) { int t = (c << n) | (f >> (32 - n)); return t; }
+
+u64 ua;
+u32 uc;
+
+u64 testu64l (u64 ue) { u64 ut = (ua << n) | (ue >> (64 - n)); return ut; }
+u64 testu64r (u64 ue) { u64 ut = (ua >> n) | (ue << (64 - n)); return ut; }
+u32 testu32l (u32 uf) { u32 ut = (uc << n) | (uf >> (32 - n)); return ut; }
+u32 testu32r (u32 uf) { u32 ut = (uc >> n) | (uf << (32 - n)); return ut; }
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 16/16] [APX NDD] Support APX NDD for cmove insns
2023-11-15 9:46 [PATCH 00/16] Support Intel APX NDD Hongyu Wang
` (14 preceding siblings ...)
2023-11-15 9:47 ` [PATCH 15/16] [APX NDD] Support APX NDD for shld/shrd insns Hongyu Wang
@ 2023-11-15 9:47 ` Hongyu Wang
15 siblings, 0 replies; 17+ messages in thread
From: Hongyu Wang @ 2023-11-15 9:47 UTC (permalink / raw)
To: gcc-patches; +Cc: ubizjak, hongtao.liu
gcc/ChangeLog:
* config/i386/i386.md (*mov<mode>cc_noc): Extend with new constraints
to support NDD.
(*movsicc_noc_zext): Likewise.
(*movsicc_noc_zext_1): Likewise.
(*movqicc_noc): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd-cmov.c: New test.
---
gcc/config/i386/i386.md | 48 ++++++++++++--------
gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c | 16 +++++++
2 files changed, 45 insertions(+), 19 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2e3d37d08b0..2ae9aaf59fb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -24119,47 +24119,56 @@ (define_split
(neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
(define_insn "*mov<mode>cc_noc"
- [(set (match_operand:SWI248 0 "register_operand" "=r,r")
+ [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r")
(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
- (match_operand:SWI248 2 "nonimmediate_operand" "rm,0")
- (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))]
+ (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r")
+ (match_operand:SWI248 3 "nonimmediate_operand" "0,rm,r,rm")))]
"TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
"@
cmov%O2%C1\t{%2, %0|%0, %2}
- cmov%O2%c1\t{%3, %0|%0, %3}"
- [(set_attr "type" "icmov")
+ cmov%O2%c1\t{%3, %0|%0, %3}
+ cmov%O2%C1\t{%2, %3, %0|%0, %3, %2}
+ cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "icmov")
(set_attr "mode" "<MODE>")])
(define_insn "*movsicc_noc_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
(if_then_else:DI (match_operator 1 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
(zero_extend:DI
- (match_operand:SI 2 "nonimmediate_operand" "rm,0"))
+ (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r"))
(zero_extend:DI
- (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))]
+ (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
"TARGET_64BIT
&& TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
"@
cmov%O2%C1\t{%2, %k0|%k0, %2}
- cmov%O2%c1\t{%3, %k0|%k0, %3}"
- [(set_attr "type" "icmov")
+ cmov%O2%c1\t{%3, %k0|%k0, %3}
+ cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
+ cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "icmov")
(set_attr "mode" "SI")])
(define_insn "*movsicc_noc_zext_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r")
(zero_extend:DI
(if_then_else:SI (match_operator 1 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
- (match_operand:SI 2 "nonimmediate_operand" "rm,0")
- (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))]
+ (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r")
+ (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
"TARGET_64BIT
&& TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
"@
cmov%O2%C1\t{%2, %k0|%k0, %2}
- cmov%O2%c1\t{%3, %k0|%k0, %3}"
- [(set_attr "type" "icmov")
+ cmov%O2%c1\t{%3, %k0|%k0, %3}
+ cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
+ cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "icmov")
(set_attr "mode" "SI")])
@@ -24184,14 +24193,15 @@ (define_split
})
(define_insn "*movqicc_noc"
- [(set (match_operand:QI 0 "register_operand" "=r,r")
+ [(set (match_operand:QI 0 "register_operand" "=r,r,r")
(if_then_else:QI (match_operator 1 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
- (match_operand:QI 2 "register_operand" "r,0")
- (match_operand:QI 3 "register_operand" "0,r")))]
+ (match_operand:QI 2 "register_operand" "r,0,r")
+ (match_operand:QI 3 "register_operand" "0,r,r")))]
"TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
"#"
- [(set_attr "type" "icmov")
+ [(set_attr "isa" "*,*,apx_ndd")
+ (set_attr "type" "icmov")
(set_attr "mode" "QI")])
(define_split
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c b/gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c
new file mode 100644
index 00000000000..459dc965342
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -m64 -mapxf" } */
+/* { dg-final { scan-assembler-times "cmove\[^\n\r]*, %eax" 1 } } */
+/* { dg-final { scan-assembler-times "cmovge\[^\n\r]*, %eax" 1 } } */
+
+unsigned int c[4];
+
+unsigned long long foo1 (int a, unsigned int b)
+{
+ return a ? b : c[1];
+}
+
+unsigned int foo3 (int a, int b, unsigned int c, unsigned int d)
+{
+ return a < b ? c : d;
+}
--
2.31.1
^ permalink raw reply [flat|nested] 17+ messages in thread