public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] [APX CFCMOV] Support APX CFCMOV in if_convert pass
       [not found]   ` <20240618063908.3056687-2-lingling.kong@intel.com>
@ 2024-06-18  7:40     ` Kong, Lingling
  2024-06-25  6:45       ` Kong, Lingling
  0 siblings, 1 reply; 3+ messages in thread
From: Kong, Lingling @ 2024-06-18  7:40 UTC (permalink / raw)
  To: gcc-patches
  Cc: Alexander Monakov, Uros Bizjak, lingling.kong7, Hongtao Liu,
	Jeff Law, Richard Biener

[-- Attachment #1: Type: text/plain, Size: 11997 bytes --]

APX CFCMOV feature implements conditionally faulting which means

that all memory faults are suppressed when the condition code

evaluates to false and load or store a memory operand. Now we

could load or store a memory operand may trap or fault for

conditional move.



In middle-end, now we don't support a conditional move if we knew

that a load from A or B could trap or fault. To enable CFCMOV, we

added a new optab.



Conditional move suppress_fault for condition mem store would not

move any arithmetic calculations. For condition mem load now just

support a conditional move one trap mem and one no trap and no mem

cases.



gcc/ChangeLog:



               * ifcvt.cc (noce_try_cmove_load_mem_notrap): Allow convert

               to cfcmov for conditional load.

               (noce_try_cmove_store_mem_notrap): Convert to conditional store.

               (noce_process_if_block): Ditto.

               * optabs.def (OPTAB_D): New optab.

---

gcc/ifcvt.cc   | 246 ++++++++++++++++++++++++++++++++++++++++++++++++-

gcc/optabs.def |   1 +

2 files changed, 246 insertions(+), 1 deletion(-)



diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc

index 58ed42673e5..65c069b8cc6 100644

--- a/gcc/ifcvt.cc

+++ b/gcc/ifcvt.cc

@@ -783,6 +783,8 @@ static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,

                                                 rtx, rtx, rtx, rtx = NULL, rtx = NULL);

static bool noce_try_cmove (struct noce_if_info *);

static bool noce_try_cmove_arith (struct noce_if_info *);

+static bool noce_try_cmove_load_mem_notrap (struct noce_if_info *);

+static bool noce_try_cmove_store_mem_notrap (struct noce_if_info *, rtx *, rtx);

static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);

static bool noce_try_minmax (struct noce_if_info *);

static bool noce_try_abs (struct noce_if_info *);

@@ -2401,6 +2403,233 @@ noce_try_cmove_arith (struct noce_if_info *if_info)

   return false;

}



+/* When target support suppress memory fault, try more complex cases involving

+   conditional_move's source or dest may trap or fault.  */

+

+static bool

+noce_try_cmove_load_mem_notrap (struct noce_if_info *if_info)

+{

+  rtx a = if_info->a;

+  rtx b = if_info->b;

+  rtx x = if_info->x;

+

+  if (MEM_P (x))

+    return false;

+  /* Just handle a conditional move from one trap MEM + other non_trap,

+     non mem cases.  */

+  if (!(MEM_P (a) ^ MEM_P (b)))

+      return false;

+  bool a_trap = may_trap_or_fault_p (a);

+  bool b_trap = may_trap_or_fault_p (b);

+

+  if (!(a_trap ^ b_trap))

+    return false;

+  if (a_trap && !MEM_P (a))

+    return false;

+  if (b_trap && !MEM_P (b))

+    return false;

+

+  rtx orig_b;

+  rtx_insn *insn_a, *insn_b;

+  bool a_simple = if_info->then_simple;

+  bool b_simple = if_info->else_simple;

+  basic_block then_bb = if_info->then_bb;

+  basic_block else_bb = if_info->else_bb;

+  rtx target;

+  enum rtx_code code;

+  rtx cond = if_info->cond;

+  rtx_insn *ifcvt_seq;

+

+  /* if (test) x = *a; else x = c - d;

+     => x = c - d;

+             if (test)

+               x = *a;

+  */

+

+  code = GET_CODE (cond);

+  insn_a = if_info->insn_a;

+  insn_b = if_info->insn_b;

+  machine_mode x_mode = GET_MODE (x);

+

+  /* Because we only handle one trap MEM + other non_trap, non mem cases,

+     just move one trap MEM always in then_bb.  */

+  if (noce_reversed_cond_code (if_info) != UNKNOWN)

+    {

+      bool reversep = false;

+      if (b_trap)

+             reversep = true;

+

+      if (reversep)

+             {

+               if (if_info->rev_cond)

+                 {

+                   cond = if_info->rev_cond;

+                   code = GET_CODE (cond);

+                 }

+               else

+                 code = reversed_comparison_code (cond, if_info->jump);

+               std::swap (a, b);

+               std::swap (insn_a, insn_b);

+               std::swap (a_simple, b_simple);

+               std::swap (then_bb, else_bb);

+             }

+    }

+

+  if (then_bb && else_bb

+      && (!bbs_ok_for_cmove_arith (then_bb, else_bb,  if_info->orig_x)

+               || !bbs_ok_for_cmove_arith (else_bb, then_bb,  if_info->orig_x)))

+    return false;

+

+  start_sequence ();

+

+  /* If one of the blocks is empty then the corresponding B or A value

+     came from the test block.  The non-empty complex block that we will

+     emit might clobber the register used by B or A, so move it to a pseudo

+     first.  */

+

+  rtx tmp_b = NULL_RTX;

+

+  /* Don't move trap mem to a pseudo. */

+  if (!may_trap_or_fault_p (b) && (b_simple || !else_bb))

+    tmp_b = gen_reg_rtx (x_mode);

+

+  orig_b = b;

+

+  rtx emit_a = NULL_RTX;

+  rtx emit_b = NULL_RTX;

+  rtx_insn *tmp_insn = NULL;

+  bool modified_in_a = false;

+  bool modified_in_b = false;

+  /* If either operand is complex, load it into a register first.

+     The best way to do this is to copy the original insn.  In this

+     way we preserve any clobbers etc that the insn may have had.

+     This is of course not possible in the IS_MEM case.  */

+

+  if (! general_operand (b, GET_MODE (b)) || tmp_b)

+    {

+               if (insn_b)

+                 {

+                   b = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b));

+                   rtx_insn *copy_of_b = as_a <rtx_insn *> (copy_rtx (insn_b));

+                   rtx set = single_set (copy_of_b);

+

+                   SET_DEST (set) = b;

+                   emit_b = PATTERN (copy_of_b);

+                 }

+               else

+                 {

+                   rtx tmp_reg = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b));

+                   emit_b = gen_rtx_SET (tmp_reg, b);

+                   b = tmp_reg;

+                 }

+    }

+

+  if (tmp_b && then_bb)

+    {

+      FOR_BB_INSNS (then_bb, tmp_insn)

+             /* Don't check inside insn_a.  We will have changed it to emit_a

+                with a destination that doesn't conflict.  */

+             if (!(insn_a && tmp_insn == insn_a)

+                 && modified_in_p (orig_b, tmp_insn))

+               {

+                 modified_in_a = true;

+                 break;

+               }

+

+    }

+

+  modified_in_b = emit_b != NULL_RTX && modified_in_p (a, emit_b);

+  /* If insn to set up A clobbers any registers B depends on, try to

+     swap insn that sets up A with the one that sets up B.  If even

+     that doesn't help, punt.  */

+  if (modified_in_a && !modified_in_b)

+    {

+      if (!noce_emit_bb (emit_b, else_bb, b_simple))

+             goto end_seq_and_fail;

+

+      if (!noce_emit_bb (emit_a, then_bb, a_simple))

+             goto end_seq_and_fail;

+    }

+  else if (!modified_in_a)

+    {

+      if (!noce_emit_bb (emit_b, else_bb, b_simple))

+             goto end_seq_and_fail;

+

+      if (!noce_emit_bb (emit_a, then_bb, a_simple))

+             goto end_seq_and_fail;

+    }

+  else

+    goto end_seq_and_fail;

+

+  target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1),

+                                               a, b);

+

+  if (! target)

+    goto end_seq_and_fail;

+

+  if (target != x)

+    noce_emit_move_insn (x, target);

+

+  ifcvt_seq = end_ifcvt_sequence (if_info);

+  if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info))

+    return false;

+

+  emit_insn_before_setloc (ifcvt_seq, if_info->jump,

+                                              INSN_LOCATION (if_info->insn_a));

+  if_info->transform_name = "noce_try_cmove_load_mem_notrap";

+  return true;

+

+ end_seq_and_fail:

+  end_sequence ();

+  return false;

+}

+

+static bool

+noce_try_cmove_store_mem_notrap (struct noce_if_info *if_info, rtx *x_ptr, rtx orig_x)

+{

+  rtx a = if_info->a;

+  rtx b = if_info->b;

+  rtx x = orig_x;

+  machine_mode x_mode = GET_MODE (x);

+

+  if (!MEM_P (x) || !rtx_equal_p (x, b))

+    return false;

+  if (!may_trap_or_fault_p (x))

+    return false;

+  if (!if_info->then_simple || !register_operand (a, x_mode))

+    return false;

+

+  rtx cond = if_info->cond;

+  enum rtx_code code = GET_CODE (cond);

+  rtx_insn *ifcvt_seq;

+

+  start_sequence ();

+

+  rtx target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1),

+                                               a, b);

+

+  if (! target)

+    goto end_seq_and_fail;

+

+  if (target != x)

+    noce_emit_move_insn (x, target);

+

+  ifcvt_seq = end_ifcvt_sequence (if_info);

+  if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info))

+    return false;

+

+  emit_insn_before_setloc (ifcvt_seq, if_info->jump,

+                                              INSN_LOCATION (if_info->insn_a));

+  if_info->transform_name = "noce_try_cmove_load_mem_notrap";

+  if_info->x = orig_x;

+  *x_ptr = orig_x;

+  return true;

+

+ end_seq_and_fail:

+  end_sequence ();

+  return false;

+}

+

/* For most cases, the simplified condition we found is the best

    choice, but this is not the case for the min/max/abs transforms.

    For these we wish to know that it is A or B in the condition.  */

@@ -4055,6 +4284,8 @@ noce_process_if_block (struct noce_if_info *if_info)

      the lifetime of hard registers on small register class machines.  */

   orig_x = x;

   if_info->orig_x = orig_x;

+  bool have_cfmovcc = (optab_handler (cfmovcc_optab, GET_MODE (orig_x))

+                                   != CODE_FOR_nothing);

   if (!REG_P (x)

       || (HARD_REGISTER_P (x)

                 && targetm.small_register_classes_for_mode_p (GET_MODE (x))))

@@ -4121,12 +4352,21 @@ noce_process_if_block (struct noce_if_info *if_info)

     }



   if (!set_b && MEM_P (orig_x))

+    {

+      /* Conditional_move_suppress_fault for condition mem store would not

+             move any arithmetic calculations.  */

+      if (have_cfmovcc

+               && HAVE_conditional_move

+               && noce_try_cmove_store_mem_notrap (if_info, &x, orig_x))

+             goto success;

+      else

     /* We want to avoid store speculation to avoid cases like

               if (pthread_mutex_trylock(mutex))

                  ++global_variable;

        Rather than go to much effort here, we rely on the SSA optimizers,

        which do a good enough job these days.  */

-    return false;

+       return false;

+    }



   if (noce_try_move (if_info))

     goto success;

@@ -4160,6 +4400,10 @@ noce_process_if_block (struct noce_if_info *if_info)

       if (HAVE_conditional_move

                 && noce_try_cmove_arith (if_info))

               goto success;

+      if (HAVE_conditional_move

+               && have_cfmovcc

+               && noce_try_cmove_load_mem_notrap (if_info))

+             goto success;

       if (noce_try_sign_mask (if_info))

               goto success;

     }

diff --git a/gcc/optabs.def b/gcc/optabs.def

index bc2611abdc2..49335ec3212 100644

--- a/gcc/optabs.def

+++ b/gcc/optabs.def

@@ -540,3 +540,4 @@ OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")

OPTAB_D (len_load_optab, "len_load_$a")

OPTAB_D (len_store_optab, "len_store_$a")

OPTAB_D (select_vl_optab, "select_vl$a")

+OPTAB_D (cfmovcc_optab, "cfmov$acc")

--

2.31.1



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 2/2] [APX CFCMOV] Support APX CFCMOV in backend
       [not found]   ` <20240618063908.3056687-3-lingling.kong@intel.com>
@ 2024-06-18  7:40     ` Kong, Lingling
  0 siblings, 0 replies; 3+ messages in thread
From: Kong, Lingling @ 2024-06-18  7:40 UTC (permalink / raw)
  To: gcc-patches
  Cc: Alexander Monakov, Uros Bizjak, lingling.kong7, Hongtao Liu,
	Jeff Law, Richard Biener

gcc/ChangeLog:

	* config/i386/i386-expand.cc (ix86_can_cfcmov_p): New function that
	test if the cfcmov can be generated.
	(ix86_expand_int_movcc): Expand to cfcmov pattern if ix86_can_cfcmov_p
	return ture.
	* config/i386/i386-opts.h (enum apx_features): Add apx_cfcmov.
	* config/i386/i386.cc (ix86_have_conditional_move_mem_notrap): New
	function to hook TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP
	(TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP): Target hook define.
	(ix86_rtx_costs): Add UNSPEC_APX_CFCMOV cost;
	* config/i386/i386.h (TARGET_APX_CFCMOV): Define.
	* config/i386/i386.md (cfmov<mode>cc): New define_insn to support
	cfcmov.
	(cfmov<mode>cc_2): Ditto.
	(UNSPEC_APX_CFCMOV): New unspec for cfcmov.
	* config/i386/i386.opt: Add enum value for cfcmov.
	* config/i386/predicates.md (register_or_cfc_mem_operand): New
	define_predicate.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-cfcmov-1.c: New test.
	* gcc.target/i386/apx-cfcmov-2.c: Ditto.
---
 gcc/config/i386/i386-expand.cc               | 63 +++++++++++++++++
 gcc/config/i386/i386-opts.h                  |  4 +-
 gcc/config/i386/i386.cc                      | 16 +++--
 gcc/config/i386/i386.h                       |  1 +
 gcc/config/i386/i386.md                      | 53 ++++++++++++--
 gcc/config/i386/i386.opt                     |  3 +
 gcc/config/i386/predicates.md                |  7 ++
 gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c | 73 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c | 40 +++++++++++
 9 files changed, 248 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 312329e550b..c02a4bcbec3 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3336,6 +3336,30 @@ ix86_expand_int_addcc (rtx operands[])
   return true;
 }
 
+/* Return TRUE if we could convert "if (test) x = a; else x = b;" to cfcmov,
+   especially when load a or b or x store may cause memmory faults.  */
+bool
+ix86_can_cfcmov_p (rtx x, rtx a, rtx b)
+{
+  machine_mode mode = GET_MODE (x);
+  if (TARGET_APX_CFCMOV
+      && (mode == DImode || mode == SImode || mode == HImode))
+    {
+      /* C load (r m r), (r m C), (r r m). For r m m could use
+	 two cfcmov. */
+      if (register_operand (x, mode)
+	  && ((MEM_P (a) && register_operand (b, mode))
+	      || (MEM_P (a) && b == const0_rtx)
+	      || (register_operand (a, mode) && MEM_P (b))
+	      || (MEM_P (a) && MEM_P (b))))
+	return true;
+      /* C store  (m r 0).  */
+      else if (MEM_P (x) && x == b && register_operand (a, mode))
+	return true;
+    }
+  return false;
+}
+
 bool
 ix86_expand_int_movcc (rtx operands[])
 {
@@ -3366,6 +3390,45 @@ ix86_expand_int_movcc (rtx operands[])
 
   compare_code = GET_CODE (compare_op);
 
+  if (MEM_P (operands[0])
+      && !ix86_can_cfcmov_p (operands[0], op2, op3))
+    return false;
+
+  if (may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3))
+      {
+	if (ix86_can_cfcmov_p (operands[0], op2, op3))
+	  {
+	    if (may_trap_or_fault_p (op2))
+	      op2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, operands[2]),
+				    UNSPEC_APX_CFCMOV);
+	    if (may_trap_or_fault_p (op3))
+	      op3 = gen_rtx_UNSPEC (mode, gen_rtvec (1, operands[3]),
+				    UNSPEC_APX_CFCMOV);
+	    emit_insn (compare_seq);
+
+	    if (may_trap_or_fault_p (op2) && may_trap_or_fault_p (op3))
+	      {
+		emit_insn (gen_rtx_SET (operands[0],
+					gen_rtx_IF_THEN_ELSE (mode,
+							      compare_op,
+							      op2,
+							      operands[0])));
+		emit_insn (gen_rtx_SET (operands[0],
+					gen_rtx_IF_THEN_ELSE (mode,
+							      compare_op,
+							      operands[0],
+							      op3)));
+	      }
+	    else
+	      emit_insn (gen_rtx_SET (operands[0],
+				      gen_rtx_IF_THEN_ELSE (mode,
+							    compare_op,
+							    op2, op3)));
+	    return true;
+	  }
+	return false;
+      }
+
   if ((op1 == const0_rtx && (code == GE || code == LT))
       || (op1 == constm1_rtx && (code == GT || code == LE)))
     sign_bit_compare_p = true;
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index c7ec0d9fd39..711519ffb53 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -143,8 +143,10 @@ enum apx_features {
   apx_nf = 1 << 4,
   apx_ccmp = 1 << 5,
   apx_zu = 1 << 6,
+  apx_cfcmov = 1 << 7,
   apx_all = apx_egpr | apx_push2pop2 | apx_ndd
-	    | apx_ppx | apx_nf | apx_ccmp | apx_zu,
+	    | apx_ppx | apx_nf | apx_ccmp | apx_zu
+	    | apx_cfcmov,
 };
 
 #endif
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 173db213d14..4d9eb272e4a 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22349,10 +22349,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	  *total = COSTS_N_INSNS (1);
 	  if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
 	    *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-	  if (!REG_P (XEXP (x, 1)))
-	    *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
-	  if (!REG_P (XEXP (x, 2)))
-	    *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
+	  rtx op1, op2;
+	  op1 = XEXP (x, 1);
+	  op2 = XEXP (x, 2);
+	  /* Handle UNSPEC_APX_CFCMOV for cfcmov.  */
+	  if (GET_CODE (op1) == UNSPEC && XINT (op1, 1) == UNSPEC_APX_CFCMOV)
+	    op1 = XVECEXP (op1, 0, 0);
+	  if (GET_CODE (op2) == UNSPEC && XINT (op2, 1) == UNSPEC_APX_CFCMOV)
+	    op2 = XVECEXP (op2, 0, 0);
+	  if (!REG_P (op1))
+	    *total += rtx_cost (op1, mode, code, 1, speed);
+	  if (!REG_P (op2))
+	    *total += rtx_cost (op2, mode, code, 2, speed);
 	  return true;
 	}
       return false;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index dc1a1f44320..6a20fa678c8 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -58,6 +58,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_APX_NF (ix86_apx_features & apx_nf)
 #define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp)
 #define TARGET_APX_ZU (ix86_apx_features & apx_zu)
+#define TARGET_APX_CFCMOV (ix86_apx_features & apx_cfcmov)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index fd48e764469..c20d85703ad 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -221,6 +221,9 @@
   ;; For APX CCMP support
   ;; DFV = default flag value
   UNSPEC_APX_DFV
+
+  ;; For APX CFCMOV support
+  UNSPEC_APX_CFCMOV
 ])
 
 (define_c_enum "unspecv" [
@@ -579,7 +582,7 @@
 		    noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
 		    avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
 		    avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
-		    vaes_avx512vl,noapx_nf"
+		    vaes_avx512vl,noapx_nf,apx_cfcmov"
   (const_string "base"))
 
 ;; The (bounding maximum) length of an instruction immediate.
@@ -986,6 +989,7 @@
 	 (eq_attr "mmx_isa" "avx")
 	   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
 	 (eq_attr "isa" "noapx_nf") (symbol_ref "!TARGET_APX_NF")
+	 (eq_attr "isa" "apx_cfcmov") (symbol_ref "TARGET_APX_CFCMOV")
 	]
 	(const_int 1)))
 
@@ -24995,7 +24999,7 @@
 ;; Conditional move instructions.
 
 (define_expand "mov<mode>cc"
-  [(set (match_operand:SWIM 0 "register_operand")
+  [(set (match_operand:SWIM 0 "register_or_cfc_mem_operand")
 	(if_then_else:SWIM (match_operand 1 "comparison_operator")
 			   (match_operand:SWIM 2 "<general_operand>")
 			   (match_operand:SWIM 3 "<general_operand>")))]
@@ -25103,19 +25107,54 @@
    (set (match_dup 0)
 	(neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
 
+(define_insn "cfmov<mode>cc"
+  [(set (match_operand:SWI248 0 "register_operand" "=r,r")
+        (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+                               [(reg FLAGS_REG) (const_int 0)])
+          (unspec:SWI248
+	   [(match_operand:SWI248 2 "memory_operand" "m,m")]
+	   UNSPEC_APX_CFCMOV)
+	  (match_operand:SWI248 3 "reg_or_0_operand" "C,r")))]
+  "TARGET_CMOVE && TARGET_APX_CFCMOV"
+  "@
+  cfcmov%O2%C1\t{%2, %0|%0, %2}
+  cfcmov%O2%C1\t{%2, %3, %0|%0, %3, %2}"
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "icmov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "cfmov<mode>cc_2"
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,m")
+        (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+                               [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:SWI248 2 "register_operand" "r,r")
+          (unspec:SWI248
+	   [(match_operand:SWI248 3 "memory_operand" "m,0")]
+	   UNSPEC_APX_CFCMOV)))]
+  "TARGET_CMOVE && TARGET_APX_CFCMOV"
+  "@
+  cfcmov%O2%c1\t{%3, %2, %0|%0, %2, %3}
+  cfcmov%O2%C1\t{%2, %0|%0, %2}"
+  [(set_attr "isa" "apx_ndd,*")
+   (set_attr "type" "icmov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*mov<mode>cc_noc"
-  [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r")
+  [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r,r")
 	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
 			       [(reg FLAGS_REG) (const_int 0)])
-	  (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r")
-	  (match_operand:SWI248 3 "nonimmediate_operand" "0,rm,r,rm")))]
+	  (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r,r")
+	  (match_operand:SWI248 3 "nonimm_or_0_operand" "0,rm,r,rm,C")))]
   "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "@
    cmov%O2%C1\t{%2, %0|%0, %2}
    cmov%O2%c1\t{%3, %0|%0, %3}
    cmov%O2%C1\t{%2, %3, %0|%0, %3, %2}
-   cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}"
-  [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
+   cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}
+   cfcmov%O2%C1\t{%2, %0|%0, %2}"
+  [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_cfcmov")
    (set_attr "type" "icmov")
    (set_attr "mode" "<MODE>")])
 
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 353fffb2343..7d63d9abd95 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1345,6 +1345,9 @@ Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7)
 EnumValue
 Enum(apx_features) String(zu) Value(apx_zu) Set(8)
 
+EnumValue
+Enum(apx_features) String(cfcmov) Value(apx_cfcmov) Set(9)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 7afe3100cb7..d562e10ab41 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2322,3 +2322,10 @@
 
   return true;
 })
+
+;; Return true if OP is a register operand or memory_operand is only
+;; supported under TARGET_APX_CFCMOV.
+(define_predicate "register_or_cfc_mem_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "mem")
+	    (match_test "TARGET_APX_CFCMOV"))))
diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
new file mode 100644
index 00000000000..4a1fb91b24c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
@@ -0,0 +1,73 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O3 -mapxf" } */
+
+/* { dg-final { scan-assembler-times "cfcmovne" 1 } } */
+/* { dg-final { scan-assembler-times "cfcmovg" 2} } */
+/* { dg-final { scan-assembler-times "cfcmove" 1 } } */
+/* { dg-final { scan-assembler-times "cfcmovl" 2 } } */
+/* { dg-final { scan-assembler-times "cfcmovle" 1 } } */
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_store (int a, int b, int c, int d, int *arr)
+{
+    if (a != b)
+        *arr = c;
+    return d;
+
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_ndd (int a, int b, int c, int *p)
+{
+  if (a > b)
+    return *p;
+  return c;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_2_trap (int a, int b, int *c, int *p)
+{
+  if (a > b)
+    return *p;
+  return *c;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_zero (int a, int b, int c)
+{
+  int sum = 0;
+  if (a == b)
+    return c;
+  return sum;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_mem (int a, int b, int *p)
+{
+    int sum = 0;
+    if (a < b )
+	sum = *p;
+    return sum;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_arith_1 (int a, int b, int c, int *p)
+{
+  int sum = 0;
+  if (a > b)
+    sum = *p;
+  else
+    sum = a + c;
+  return sum + 1;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_arith_2 (int a, int b, int c, int *p)
+{
+  int sum = 0;
+  if (a > b)
+    sum = a + c;
+  else
+    sum = *p;
+  return sum + 1;
+}
diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
new file mode 100644
index 00000000000..2b1660f64fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target apxf } */
+/* { dg-options "-mapxf -march=x86-64 -O3" } */
+
+#include "apx-cfcmov-1.c"
+
+extern void abort (void);
+
+int main ()
+{
+  if (!__builtin_cpu_supports ("apxf"))
+    return 0;
+
+  int arr = 6;
+  int arr1 = 5;
+  int res = cfc_store (1, 2, 3, 4, &arr);
+  if (arr != 3 && res != 4)
+    abort ();
+  res = cfc_load_ndd (2, 1, 2, &arr);
+  if (res != 3)
+    abort ();
+  res = cfc_load_2_trap (1, 2, &arr1, &arr);
+  if (res != 5)
+    abort ();
+  res = cfc_load_zero (1, 2, 3);
+  res = cfc_load_zero (1, 2, 3);
+  if (res != 0)
+    abort ();
+  res = cfc_load_mem (2, 1, &arr);
+  if (res != 0)
+    abort ();
+  res = cfc_load_arith_1 (1, 2, 3, &arr);
+  if (res != 5)
+    abort();
+  res = cfc_load_arith_2 (2, 1, 3,&arr);
+  if (res != 6)
+    abort();
+  return 0;
+}
+
-- 
2.31.1

 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH v2 1/2] [APX CFCMOV] Support APX CFCMOV in if_convert pass
  2024-06-18  7:40     ` [PATCH v2 1/2] [APX CFCMOV] Support APX CFCMOV in if_convert pass Kong, Lingling
@ 2024-06-25  6:45       ` Kong, Lingling
  0 siblings, 0 replies; 3+ messages in thread
From: Kong, Lingling @ 2024-06-25  6:45 UTC (permalink / raw)
  To: gcc-patches
  Cc: Alexander Monakov, Uros Bizjak, lingling.kong7, Hongtao Liu,
	Jeff Law, Richard Biener

[-- Attachment #1: Type: text/plain, Size: 12541 bytes --]

Hi,


Gently ping for this.

This version has removed the target hook and added a new optab for cfcmov.

Thanks,
Lingling

From: Kong, Lingling <lingling.kong@intel.com>
Sent: Tuesday, June 18, 2024 3:41 PM
To: gcc-patches@gcc.gnu.org
Cc: Alexander Monakov <amonakov@ispras.ru>; Uros Bizjak <ubizjak@gmail.com>; lingling.kong7@gmail.com; Hongtao Liu <crazylht@gmail.com>; Jeff Law <jeffreyalaw@gmail.com>; Richard Biener <richard.guenther@gmail.com>
Subject: [PATCH v2 1/2] [APX CFCMOV] Support APX CFCMOV in if_convert pass


APX CFCMOV feature implements conditionally faulting which means

that all memory faults are suppressed when the condition code

evaluates to false and load or store a memory operand. Now we

could load or store a memory operand may trap or fault for

conditional move.



In middle-end, now we don't support a conditional move if we knew

that a load from A or B could trap or fault. To enable CFCMOV, we

added a new optab.



Conditional move suppress_fault for condition mem store would not

move any arithmetic calculations. For condition mem load now just

support a conditional move one trap mem and one no trap and no mem

cases.



gcc/ChangeLog:



               * ifcvt.cc (noce_try_cmove_load_mem_notrap): Allow convert

               to cfcmov for conditional load.

               (noce_try_cmove_store_mem_notrap): Convert to conditional store.

               (noce_process_if_block): Ditto.

               * optabs.def (OPTAB_D): New optab.

---

gcc/ifcvt.cc   | 246 ++++++++++++++++++++++++++++++++++++++++++++++++-

gcc/optabs.def |   1 +

2 files changed, 246 insertions(+), 1 deletion(-)



diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc

index 58ed42673e5..65c069b8cc6 100644

--- a/gcc/ifcvt.cc

+++ b/gcc/ifcvt.cc

@@ -783,6 +783,8 @@ static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,

                                                 rtx, rtx, rtx, rtx = NULL, rtx = NULL);

static bool noce_try_cmove (struct noce_if_info *);

static bool noce_try_cmove_arith (struct noce_if_info *);

+static bool noce_try_cmove_load_mem_notrap (struct noce_if_info *);

+static bool noce_try_cmove_store_mem_notrap (struct noce_if_info *, rtx *, rtx);

static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);

static bool noce_try_minmax (struct noce_if_info *);

static bool noce_try_abs (struct noce_if_info *);

@@ -2401,6 +2403,233 @@ noce_try_cmove_arith (struct noce_if_info *if_info)

   return false;

}



+/* When target support suppress memory fault, try more complex cases involving

+   conditional_move's source or dest may trap or fault.  */

+

+static bool

+noce_try_cmove_load_mem_notrap (struct noce_if_info *if_info)

+{

+  rtx a = if_info->a;

+  rtx b = if_info->b;

+  rtx x = if_info->x;

+

+  if (MEM_P (x))

+    return false;

+  /* Just handle a conditional move from one trap MEM + other non_trap,

+     non mem cases.  */

+  if (!(MEM_P (a) ^ MEM_P (b)))

+      return false;

+  bool a_trap = may_trap_or_fault_p (a);

+  bool b_trap = may_trap_or_fault_p (b);

+

+  if (!(a_trap ^ b_trap))

+    return false;

+  if (a_trap && !MEM_P (a))

+    return false;

+  if (b_trap && !MEM_P (b))

+    return false;

+

+  rtx orig_b;

+  rtx_insn *insn_a, *insn_b;

+  bool a_simple = if_info->then_simple;

+  bool b_simple = if_info->else_simple;

+  basic_block then_bb = if_info->then_bb;

+  basic_block else_bb = if_info->else_bb;

+  rtx target;

+  enum rtx_code code;

+  rtx cond = if_info->cond;

+  rtx_insn *ifcvt_seq;

+

+  /* if (test) x = *a; else x = c - d;

+     => x = c - d;

+             if (test)

+               x = *a;

+  */

+

+  code = GET_CODE (cond);

+  insn_a = if_info->insn_a;

+  insn_b = if_info->insn_b;

+  machine_mode x_mode = GET_MODE (x);

+

+  /* Because we only handle one trap MEM + other non_trap, non mem cases,

+     just move one trap MEM always in then_bb.  */

+  if (noce_reversed_cond_code (if_info) != UNKNOWN)

+    {

+      bool reversep = false;

+      if (b_trap)

+             reversep = true;

+

+      if (reversep)

+             {

+               if (if_info->rev_cond)

+                 {

+                   cond = if_info->rev_cond;

+                   code = GET_CODE (cond);

+                 }

+               else

+                 code = reversed_comparison_code (cond, if_info->jump);

+               std::swap (a, b);

+               std::swap (insn_a, insn_b);

+               std::swap (a_simple, b_simple);

+               std::swap (then_bb, else_bb);

+             }

+    }

+

+  if (then_bb && else_bb

+      && (!bbs_ok_for_cmove_arith (then_bb, else_bb,  if_info->orig_x)

+               || !bbs_ok_for_cmove_arith (else_bb, then_bb,  if_info->orig_x)))

+    return false;

+

+  start_sequence ();

+

+  /* If one of the blocks is empty then the corresponding B or A value

+     came from the test block.  The non-empty complex block that we will

+     emit might clobber the register used by B or A, so move it to a pseudo

+     first.  */

+

+  rtx tmp_b = NULL_RTX;

+

+  /* Don't move trap mem to a pseudo. */

+  if (!may_trap_or_fault_p (b) && (b_simple || !else_bb))

+    tmp_b = gen_reg_rtx (x_mode);

+

+  orig_b = b;

+

+  rtx emit_a = NULL_RTX;

+  rtx emit_b = NULL_RTX;

+  rtx_insn *tmp_insn = NULL;

+  bool modified_in_a = false;

+  bool modified_in_b = false;

+  /* If either operand is complex, load it into a register first.

+     The best way to do this is to copy the original insn.  In this

+     way we preserve any clobbers etc that the insn may have had.

+     This is of course not possible in the IS_MEM case.  */

+

+  if (! general_operand (b, GET_MODE (b)) || tmp_b)

+    {

+               if (insn_b)

+                 {

+                   b = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b));

+                   rtx_insn *copy_of_b = as_a <rtx_insn *> (copy_rtx (insn_b));

+                   rtx set = single_set (copy_of_b);

+

+                   SET_DEST (set) = b;

+                   emit_b = PATTERN (copy_of_b);

+                 }

+               else

+                 {

+                   rtx tmp_reg = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b));

+                   emit_b = gen_rtx_SET (tmp_reg, b);

+                   b = tmp_reg;

+                 }

+    }

+

+  if (tmp_b && then_bb)

+    {

+      FOR_BB_INSNS (then_bb, tmp_insn)

+             /* Don't check inside insn_a.  We will have changed it to emit_a

+                with a destination that doesn't conflict.  */

+             if (!(insn_a && tmp_insn == insn_a)

+                 && modified_in_p (orig_b, tmp_insn))

+               {

+                 modified_in_a = true;

+                 break;

+               }

+

+    }

+

+  modified_in_b = emit_b != NULL_RTX && modified_in_p (a, emit_b);

+  /* If insn to set up A clobbers any registers B depends on, try to

+     swap insn that sets up A with the one that sets up B.  If even

+     that doesn't help, punt.  */

+  if (modified_in_a && !modified_in_b)

+    {

+      if (!noce_emit_bb (emit_b, else_bb, b_simple))

+             goto end_seq_and_fail;

+

+      if (!noce_emit_bb (emit_a, then_bb, a_simple))

+             goto end_seq_and_fail;

+    }

+  else if (!modified_in_a)

+    {

+      if (!noce_emit_bb (emit_b, else_bb, b_simple))

+             goto end_seq_and_fail;

+

+      if (!noce_emit_bb (emit_a, then_bb, a_simple))

+             goto end_seq_and_fail;

+    }

+  else

+    goto end_seq_and_fail;

+

+  target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1),

+                                               a, b);

+

+  if (! target)

+    goto end_seq_and_fail;

+

+  if (target != x)

+    noce_emit_move_insn (x, target);

+

+  ifcvt_seq = end_ifcvt_sequence (if_info);

+  if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info))

+    return false;

+

+  emit_insn_before_setloc (ifcvt_seq, if_info->jump,

+                                              INSN_LOCATION (if_info->insn_a));

+  if_info->transform_name = "noce_try_cmove_load_mem_notrap";

+  return true;

+

+ end_seq_and_fail:

+  end_sequence ();

+  return false;

+}

+

+static bool

+noce_try_cmove_store_mem_notrap (struct noce_if_info *if_info, rtx *x_ptr, rtx orig_x)

+{

+  rtx a = if_info->a;

+  rtx b = if_info->b;

+  rtx x = orig_x;

+  machine_mode x_mode = GET_MODE (x);

+

+  if (!MEM_P (x) || !rtx_equal_p (x, b))

+    return false;

+  if (!may_trap_or_fault_p (x))

+    return false;

+  if (!if_info->then_simple || !register_operand (a, x_mode))

+    return false;

+

+  rtx cond = if_info->cond;

+  enum rtx_code code = GET_CODE (cond);

+  rtx_insn *ifcvt_seq;

+

+  start_sequence ();

+

+  rtx target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1),

+                                               a, b);

+

+  if (! target)

+    goto end_seq_and_fail;

+

+  if (target != x)

+    noce_emit_move_insn (x, target);

+

+  ifcvt_seq = end_ifcvt_sequence (if_info);

+  if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info))

+    return false;

+

+  emit_insn_before_setloc (ifcvt_seq, if_info->jump,

+                                              INSN_LOCATION (if_info->insn_a));

+  if_info->transform_name = "noce_try_cmove_load_mem_notrap";

+  if_info->x = orig_x;

+  *x_ptr = orig_x;

+  return true;

+

+ end_seq_and_fail:

+  end_sequence ();

+  return false;

+}

+

/* For most cases, the simplified condition we found is the best

    choice, but this is not the case for the min/max/abs transforms.

    For these we wish to know that it is A or B in the condition.  */

@@ -4055,6 +4284,8 @@ noce_process_if_block (struct noce_if_info *if_info)

      the lifetime of hard registers on small register class machines.  */

   orig_x = x;

   if_info->orig_x = orig_x;

+  bool have_cfmovcc = (optab_handler (cfmovcc_optab, GET_MODE (orig_x))

+                                   != CODE_FOR_nothing);

   if (!REG_P (x)

       || (HARD_REGISTER_P (x)

                 && targetm.small_register_classes_for_mode_p (GET_MODE (x))))

@@ -4121,12 +4352,21 @@ noce_process_if_block (struct noce_if_info *if_info)

     }



   if (!set_b && MEM_P (orig_x))

+    {

+      /* Conditional_move_suppress_fault for condition mem store would not

+             move any arithmetic calculations.  */

+      if (have_cfmovcc

+               && HAVE_conditional_move

+               && noce_try_cmove_store_mem_notrap (if_info, &x, orig_x))

+             goto success;

+      else

     /* We want to avoid store speculation to avoid cases like

               if (pthread_mutex_trylock(mutex))

                  ++global_variable;

        Rather than go to much effort here, we rely on the SSA optimizers,

        which do a good enough job these days.  */

-    return false;

+       return false;

+    }



   if (noce_try_move (if_info))

     goto success;

@@ -4160,6 +4400,10 @@ noce_process_if_block (struct noce_if_info *if_info)

       if (HAVE_conditional_move

                 && noce_try_cmove_arith (if_info))

               goto success;

+      if (HAVE_conditional_move

+               && have_cfmovcc

+               && noce_try_cmove_load_mem_notrap (if_info))

+             goto success;

       if (noce_try_sign_mask (if_info))

               goto success;

     }

diff --git a/gcc/optabs.def b/gcc/optabs.def

index bc2611abdc2..49335ec3212 100644

--- a/gcc/optabs.def

+++ b/gcc/optabs.def

@@ -540,3 +540,4 @@ OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")

OPTAB_D (len_load_optab, "len_load_$a")

OPTAB_D (len_store_optab, "len_store_$a")

OPTAB_D (select_vl_optab, "select_vl$a")

+OPTAB_D (cfmovcc_optab, "cfmov$acc")

--

2.31.1



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-06-25  6:46 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <DM4PR11MB54878A1C8EC27664CA965F6EECCE2@DM4PR11MB5487.namprd11.prod.outlook.com>
     [not found] ` <20240618063908.3056687-1-lingling.kong@intel.com>
     [not found]   ` <20240618063908.3056687-2-lingling.kong@intel.com>
2024-06-18  7:40     ` [PATCH v2 1/2] [APX CFCMOV] Support APX CFCMOV in if_convert pass Kong, Lingling
2024-06-25  6:45       ` Kong, Lingling
     [not found]   ` <20240618063908.3056687-3-lingling.kong@intel.com>
2024-06-18  7:40     ` [PATCH v2 2/2] [APX CFCMOV] Support APX CFCMOV in backend Kong, Lingling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).