public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-6255] [APX NDD] Support TImode shift for NDD
@ 2023-12-07  1:40 Hongyu Wang
  0 siblings, 0 replies; only message in thread
From: Hongyu Wang @ 2023-12-07  1:40 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3ba505c7b17a208b1c546b7a974a02e8003b60ef

commit r14-6255-g3ba505c7b17a208b1c546b7a974a02e8003b60ef
Author: Hongyu Wang <hongyu.wang@intel.com>
Date:   Sat Dec 2 12:55:59 2023 +0800

    [APX NDD] Support TImode shift for NDD
    
    For TImode shifts, they are splitted by splitter functions, which assume
    operands[0] and operands[1] to be the same. For the NDD alternative the
    assumption may not be true so add split functions for NDD to emit the NDD
    form instructions, and omit the handling of !64bit target split.
    
    Although the NDD form allows memory src, for post-reload splitter there are
    no extra register to accept NDD form shift, especially shld/shrd. So only
    accept register alternative for shift src under NDD.
    
    gcc/ChangeLog:
    
            * config/i386/i386-expand.cc (ix86_split_ashl_ndd): New
            function to split NDD form lshift.
            (ix86_split_rshift_ndd): Likewise for l/ashiftrt.
            * config/i386/i386-protos.h (ix86_split_ashl_ndd): New
            prototype.
            (ix86_split_rshift_ndd): Likewise.
            * config/i386/i386.md (ashl<mode>3_doubleword): Add NDD
            alternative, call ndd split function when operands[0]
            not equal to operands[1].
            (define_split for doubleword lshift): Likewise.
            (define_peephole for doubleword lshift): Likewise.
            (<insn><mode>3_doubleword): Likewise for l/ashiftrt.
            (define_split for doubleword l/ashiftrt): Likewise.
            (define_peephole for doubleword l/ashiftrt): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/apx-ndd-ti-shift.c: New test.

Diff:
---
 gcc/config/i386/i386-expand.cc                   | 136 +++++++++++++++++++++++
 gcc/config/i386/i386-protos.h                    |   2 +
 gcc/config/i386/i386.md                          |  56 ++++++++--
 gcc/testsuite/gcc.target/i386/apx-ndd-ti-shift.c |  91 +++++++++++++++
 4 files changed, 273 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index d4bbd33ce07..a53d69d5400 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -6678,6 +6678,142 @@ ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
     }
 }
 
+/* Helper function to split TImode ashl under NDD.  */
+void
+ix86_split_ashl_ndd (rtx *operands, rtx scratch)
+{
+  gcc_assert (TARGET_APX_NDD);
+  int half_width = GET_MODE_BITSIZE (TImode) >> 1;
+
+  rtx low[2], high[2];
+  int count;
+
+  split_double_mode (TImode, operands, 2, low, high);
+  if (CONST_INT_P (operands[2]))
+    {
+      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (TImode) - 1);
+
+      if (count >= half_width)
+	{
+	  count = count - half_width;
+	  if (count == 0)
+	    {
+	      if (!rtx_equal_p (high[0], low[1]))
+		emit_move_insn (high[0], low[1]);
+	    }
+	  else if (count == 1)
+	    emit_insn (gen_adddi3 (high[0], low[1], low[1]));
+	  else
+	    emit_insn (gen_ashldi3 (high[0], low[1], GEN_INT (count)));
+
+	  ix86_expand_clear (low[0]);
+	}
+      else if (count == 1)
+	{
+	  rtx x3 = gen_rtx_REG (CCCmode, FLAGS_REG);
+	  rtx x4 = gen_rtx_LTU (TImode, x3, const0_rtx);
+	  emit_insn (gen_add3_cc_overflow_1 (DImode, low[0],
+					     low[1], low[1]));
+	  emit_insn (gen_add3_carry (DImode, high[0], high[1], high[1],
+				     x3, x4));
+	}
+      else
+	{
+	  emit_insn (gen_x86_64_shld_ndd (high[0], high[1], low[1],
+					  GEN_INT (count)));
+	  emit_insn (gen_ashldi3 (low[0], low[1], GEN_INT (count)));
+	}
+    }
+  else
+    {
+      emit_insn (gen_x86_64_shld_ndd (high[0], high[1], low[1],
+				      operands[2]));
+      emit_insn (gen_ashldi3 (low[0], low[1], operands[2]));
+      if (TARGET_CMOVE && scratch)
+	{
+	  ix86_expand_clear (scratch);
+	  emit_insn (gen_x86_shift_adj_1
+		     (DImode, high[0], low[0], operands[2], scratch));
+	}
+      else
+	emit_insn (gen_x86_shift_adj_2 (DImode, high[0], low[0], operands[2]));
+    }
+}
+
+/* Helper function to split TImode l/ashr under NDD.  */
+void
+ix86_split_rshift_ndd (enum rtx_code code, rtx *operands, rtx scratch)
+{
+  gcc_assert (TARGET_APX_NDD);
+  int half_width = GET_MODE_BITSIZE (TImode) >> 1;
+  bool ashr_p = code == ASHIFTRT;
+  rtx (*gen_shr)(rtx, rtx, rtx) = ashr_p ? gen_ashrdi3
+					 : gen_lshrdi3;
+
+  rtx low[2], high[2];
+  int count;
+
+  split_double_mode (TImode, operands, 2, low, high);
+  if (CONST_INT_P (operands[2]))
+    {
+      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (TImode) - 1);
+
+      if (ashr_p && (count == GET_MODE_BITSIZE (TImode) - 1))
+	{
+	  emit_insn (gen_shr (high[0], high[1],
+			      GEN_INT (half_width - 1)));
+	  emit_move_insn (low[0], high[0]);
+	}
+      else if (count >= half_width)
+	{
+	  if (ashr_p)
+	    emit_insn (gen_shr (high[0], high[1],
+				GEN_INT (half_width - 1)));
+	  else
+	    ix86_expand_clear (high[0]);
+
+	  if (count > half_width)
+	    emit_insn (gen_shr (low[0], high[1],
+				GEN_INT (count - half_width)));
+	  else
+	    emit_move_insn (low[0], high[1]);
+	}
+      else
+	{
+	  emit_insn (gen_x86_64_shrd_ndd (low[0], low[1], high[1],
+					  GEN_INT (count)));
+	  emit_insn (gen_shr (high[0], high[1], GEN_INT (count)));
+	}
+    }
+  else
+    {
+      emit_insn (gen_x86_64_shrd_ndd (low[0], low[1], high[1],
+				      operands[2]));
+      emit_insn (gen_shr (high[0], high[1], operands[2]));
+
+      if (TARGET_CMOVE && scratch)
+	{
+	  if (ashr_p)
+	    {
+	      emit_move_insn (scratch, high[0]);
+	      emit_insn (gen_shr (scratch, scratch,
+				  GEN_INT (half_width - 1)));
+	    }
+	  else
+	    ix86_expand_clear (scratch);
+
+	  emit_insn (gen_x86_shift_adj_1
+		     (DImode, low[0], high[0], operands[2], scratch));
+	}
+      else if (ashr_p)
+	emit_insn (gen_x86_shift_adj_3
+		   (DImode, low[0], high[0], operands[2]));
+      else
+	emit_insn (gen_x86_shift_adj_2
+		   (DImode, low[0], high[0], operands[2]));
+    }
+}
+
 /* Expand move of V1TI mode register X to a new TI mode register.  */
 static rtx
 ix86_expand_v1ti_to_ti (rtx x)
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index fa952409729..56349064a6c 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -174,8 +174,10 @@ extern void x86_initialize_trampoline (rtx, rtx, rtx);
 extern rtx ix86_zero_extend_to_Pmode (rtx);
 extern void ix86_split_long_move (rtx[]);
 extern void ix86_split_ashl (rtx *, rtx, machine_mode);
+extern void ix86_split_ashl_ndd (rtx *, rtx);
 extern void ix86_split_ashr (rtx *, rtx, machine_mode);
 extern void ix86_split_lshr (rtx *, rtx, machine_mode);
+extern void ix86_split_rshift_ndd (enum rtx_code, rtx *, rtx);
 extern void ix86_expand_v1ti_shift (enum rtx_code, rtx[]);
 extern void ix86_expand_v1ti_rotate (enum rtx_code, rtx[]);
 extern void ix86_expand_v1ti_ashiftrt (rtx[]);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 017ab720293..b4db50f61cd 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14425,13 +14425,14 @@
 })
 
 (define_insn "ashl<mode>3_doubleword"
-  [(set (match_operand:DWI 0 "register_operand" "=&r")
-	(ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n")
-		    (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+  [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
+	(ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n,r")
+		    (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
-  [(set_attr "type" "multi")])
+  [(set_attr "type" "multi")
+   (set_attr "isa" "*,apx_ndd")])
 
 (define_split
   [(set (match_operand:DWI 0 "register_operand")
@@ -14440,7 +14441,15 @@
    (clobber (reg:CC FLAGS_REG))]
   "epilogue_completed"
   [(const_int 0)]
-  "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;")
+{
+  if (TARGET_APX_NDD
+      && !rtx_equal_p (operands[0], operands[1])
+      && REG_P (operands[1]))
+    ix86_split_ashl_ndd (operands, NULL_RTX);
+  else
+    ix86_split_ashl (operands, NULL_RTX, <MODE>mode);
+  DONE;
+})
 
 ;; By default we don't ask for a scratch register, because when DWImode
 ;; values are manipulated, registers are already at a premium.  But if
@@ -14456,7 +14465,15 @@
    (match_dup 3)]
   "TARGET_CMOVE"
   [(const_int 0)]
-  "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
+{
+  if (TARGET_APX_NDD
+      && !rtx_equal_p (operands[0], operands[1])
+      && (REG_P (operands[1])))
+    ix86_split_ashl_ndd (operands, operands[3]);
+  else
+    ix86_split_ashl (operands, operands[3], <DWI>mode);
+  DONE;
+})
 
 (define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
   [(set (match_operand:<DWI> 0 "register_operand" "=r")
@@ -15713,16 +15730,24 @@
 })
 
 (define_insn_and_split "<insn><mode>3_doubleword"
-  [(set (match_operand:DWI 0 "register_operand" "=&r")
-	(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+  [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
+	(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0,r")
+			 (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
   "epilogue_completed"
   [(const_int 0)]
-  "ix86_split_<insn> (operands, NULL_RTX, <MODE>mode); DONE;"
-  [(set_attr "type" "multi")])
+{
+  if (TARGET_APX_NDD
+      && !rtx_equal_p (operands[0], operands[1]))
+    ix86_split_rshift_ndd (<CODE>, operands, NULL_RTX);
+  else
+    ix86_split_<insn> (operands, NULL_RTX, <MODE>mode);
+  DONE;
+}
+  [(set_attr "type" "multi")
+   (set_attr "isa" "*,apx_ndd")])
 
 ;; By default we don't ask for a scratch register, because when DWImode
 ;; values are manipulated, registers are already at a premium.  But if
@@ -15738,7 +15763,14 @@
    (match_dup 3)]
   "TARGET_CMOVE"
   [(const_int 0)]
-  "ix86_split_<insn> (operands, operands[3], <DWI>mode); DONE;")
+{
+  if (TARGET_APX_NDD
+      && !rtx_equal_p (operands[0], operands[1]))
+    ix86_split_rshift_ndd (<CODE>, operands, operands[3]);
+  else
+    ix86_split_<insn> (operands, operands[3], <DWI>mode);
+  DONE;
+})
 
 ;; Split truncations of double word right shifts into x86_shrd_1.
 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-ti-shift.c b/gcc/testsuite/gcc.target/i386/apx-ndd-ti-shift.c
new file mode 100644
index 00000000000..0489712b7f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd-ti-shift.c
@@ -0,0 +1,91 @@
+/* { dg-do run { target { int128 && { ! ia32 } } } } */
+/* { dg-require-effective-target apxf } */
+/* { dg-options "-O2" } */
+
+#include <stdlib.h>
+
+#define APX_TARGET __attribute__((noinline, target("apxf")))
+#define NO_APX __attribute__((noinline, target("no-apxf")))
+typedef __uint128_t u128;
+typedef __int128 i128;
+
+#define TI_SHIFT_FUNC(TYPE, op, name) \
+APX_TARGET \
+TYPE apx_##name##TYPE (TYPE a, char b) \
+{ \
+  return a op b; \
+} \
+TYPE noapx_##name##TYPE (TYPE a, char b) \
+{ \
+  return a op b; \
+} \
+
+#define TI_SHIFT_FUNC_CONST(TYPE, i, op, name) \
+APX_TARGET \
+TYPE apx_##name##TYPE##_const (TYPE a) \
+{ \
+  return a op i; \
+} \
+NO_APX \
+TYPE noapx_##name##TYPE##_const (TYPE a) \
+{ \
+  return a op i; \
+}
+
+#define TI_SHIFT_TEST(TYPE, name, val) \
+{\
+  if (apx_##name##TYPE (val, b) != noapx_##name##TYPE (val, b)) \
+    abort (); \
+}
+
+#define TI_SHIFT_CONST_TEST(TYPE, name, val) \
+{\
+  if (apx_##name##1##TYPE##_const (val) \
+      != noapx_##name##1##TYPE##_const (val)) \
+    abort (); \
+  if (apx_##name##2##TYPE##_const (val) \
+      != noapx_##name##2##TYPE##_const (val)) \
+    abort (); \
+  if (apx_##name##3##TYPE##_const (val) \
+      != noapx_##name##3##TYPE##_const (val)) \
+    abort (); \
+  if (apx_##name##4##TYPE##_const (val) \
+      != noapx_##name##4##TYPE##_const (val)) \
+    abort (); \
+}
+
+TI_SHIFT_FUNC(i128, <<, ashl)
+TI_SHIFT_FUNC(i128, >>, ashr)
+TI_SHIFT_FUNC(u128, >>, lshr)
+
+TI_SHIFT_FUNC_CONST(i128, 1, <<, ashl1)
+TI_SHIFT_FUNC_CONST(i128, 65, <<, ashl2)
+TI_SHIFT_FUNC_CONST(i128, 64, <<, ashl3)
+TI_SHIFT_FUNC_CONST(i128, 87, <<, ashl4)
+TI_SHIFT_FUNC_CONST(i128, 127, >>, ashr1)
+TI_SHIFT_FUNC_CONST(i128, 87, >>, ashr2)
+TI_SHIFT_FUNC_CONST(i128, 27, >>, ashr3)
+TI_SHIFT_FUNC_CONST(i128, 64, >>, ashr4)
+TI_SHIFT_FUNC_CONST(u128, 127, >>, lshr1)
+TI_SHIFT_FUNC_CONST(u128, 87, >>, lshr2)
+TI_SHIFT_FUNC_CONST(u128, 27, >>, lshr3)
+TI_SHIFT_FUNC_CONST(u128, 64, >>, lshr4)
+
+int main (void)
+{
+  if (!__builtin_cpu_supports ("apxf"))
+    return 0;
+
+  u128 ival = 0x123456788765432FLL;
+  u128 uval = 0xF234567887654321ULL;
+  char b = 28;
+
+  TI_SHIFT_TEST(i128, ashl, ival)
+  TI_SHIFT_TEST(i128, ashr, ival)
+  TI_SHIFT_TEST(u128, lshr, uval)
+  TI_SHIFT_CONST_TEST(i128, ashl, ival)
+  TI_SHIFT_CONST_TEST(i128, ashr, ival)
+  TI_SHIFT_CONST_TEST(u128, lshr, uval)
+
+  return 0;
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-12-07  1:40 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-07  1:40 [gcc r14-6255] [APX NDD] Support TImode shift for NDD Hongyu Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).