public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns
@ 2023-12-06  5:24 Jiufu Guo
  2023-12-06  5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Jiufu Guo @ 2023-12-06  5:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu

Hi,

Trunk gcc supports more constants to be built via two instructions:
e.g. "li/lis; xori/xoris/rldicl/rldicr/rldic".
And then num_insns_constant should also be updated.

Function "rs6000_emit_set_long_const" is used to build complicated
constants; and "num_insns_constant_gpr" is used to compute 'how
many instructions are needed" to build the constant. So, these 
two functions should be aligned.

The idea of this patch is: to reuse "rs6000_emit_set_long_const" to
compute/record the instruction number(when computing the insn_num, 
then do not emit instructions).

Compare with the previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636565.html
This version updates "rs6000_emit_set_long_const" to use a condition
if to select either "computing insn number" or "emitting the insn".
And put them together to avoid misalign in the future.

Bootstrap & regtest pass ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new
	parameter to record number of instructions to build the constant.
	(num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute
	num_insn.

---
 gcc/config/rs6000/rs6000.cc | 272 ++++++++++++++++++------------------
 1 file changed, 137 insertions(+), 135 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3dfd79c4c43..dbdc72dce5d 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
-static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
+static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr);
 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
@@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
 
   else if (TARGET_POWERPC64)
     {
-      HOST_WIDE_INT low = sext_hwi (value, 32);
-      HOST_WIDE_INT high = value >> 31;
-
-      if (high == 0 || high == -1)
-	return 2;
-
-      high >>= 1;
-
-      if (low == 0 || low == high)
-	return num_insns_constant_gpr (high) + 1;
-      else if (high == 0)
-	return num_insns_constant_gpr (low) + 1;
-      else
-	return (num_insns_constant_gpr (high)
-		+ num_insns_constant_gpr (low) + 1);
+      int num_insns = 0;
+      rs6000_emit_set_long_const (NULL, value, &num_insns);
+      return num_insns;
     }
 
   else
@@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
 
 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
    Output insns to set DEST equal to the constant C as a series of
-   lis, ori and shl instructions.  */
+   lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
+   only increase *NUM_INSNS as the number of insns, and do not output
+   real insns.  */
 
 static void
-rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
+rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
 {
-  rtx temp;
-  int shift;
-  HOST_WIDE_INT mask;
   HOST_WIDE_INT ud1, ud2, ud3, ud4;
 
   ud1 = c & 0xffff;
@@ -10509,168 +10496,183 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
   ud3 = (c >> 32) & 0xffff;
   ud4 = (c >> 48) & 0xffff;
 
-  if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
-      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
-    emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
+  /* This lambda is used to emit one insn or just increase the insn count.
+     When counting the insn number, no need to emit the insn.  Here, two
+     kinds of insns are needed: move and rldimi. */
+  auto count_or_emit_insn = [&num_insns] (rtx dest, rtx op1, rtx op2 = NULL) {
+    if (num_insns)
+      (*num_insns)++;
+    else if (!op2)
+      emit_move_insn (dest, op1);
+    else
+      emit_insn (gen_rotldi3_insert_3 (dest, op1, GEN_INT (32), op2,
+				       GEN_INT (0xffffffff)));
+  };
 
-  else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
-	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
+  if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
+      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
     {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      /* li */
+      count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
+      return;
+    }
+
+  rtx temp = num_insns ? nullptr
+		       : can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest;
 
-      emit_move_insn (ud1 != 0 ? temp : dest,
-		      GEN_INT (sext_hwi (ud2 << 16, 32)));
+  if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
+      || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
+    {
+      /* lis[; ori] */
+      count_or_emit_insn (ud1 != 0 ? temp : dest,
+			  GEN_INT (sext_hwi (ud2 << 16, 32)));
       if (ud1 != 0)
-	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+	count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+      return;
     }
-  else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
+
+  if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
     {
       /* lis; xoris */
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
-      emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
-      emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
+      count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
+      count_or_emit_insn (dest,
+			  gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
+      return;
     }
-  else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
+
+  if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
     {
       /* li; xoris */
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
-      emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
-      emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
-					 GEN_INT ((ud2 ^ 0xffff) << 16)));
+      count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
+      count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp,
+					     GEN_INT ((ud2 ^ 0xffff) << 16)));
+      return;
     }
-  else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
-	   || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
-	   || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
-	   || can_be_built_by_li_and_rldic (c, &shift, &mask))
+
+  int shift;
+  HOST_WIDE_INT mask;
+  if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
+      || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
+      || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
+      || can_be_built_by_li_and_rldic (c, &shift, &mask))
     {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      /* li/lis; rldicX */
       unsigned HOST_WIDE_INT imm = (c | ~mask);
       imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
 
-      emit_move_insn (temp, GEN_INT (imm));
+      count_or_emit_insn (temp, GEN_INT (imm));
       if (shift != 0)
 	temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
       if (mask != HOST_WIDE_INT_M1)
 	temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
-      emit_move_insn (dest, temp);
-    }
-  else if (ud3 == 0 && ud4 == 0)
-    {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      count_or_emit_insn (dest, temp);
 
-      gcc_assert (ud2 & 0x8000);
+      return;
+    }
 
-      if (ud1 == 0)
-	{
-	  /* lis; rldicl */
-	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
-	  emit_move_insn (dest,
-			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
-	}
-      else if (!(ud1 & 0x8000))
+  if (ud3 == 0 && ud4 == 0)
+    {
+      gcc_assert ((ud2 & 0x8000) && ud1 != 0);
+      if (!(ud1 & 0x8000))
 	{
 	  /* li; oris */
-	  emit_move_insn (temp, GEN_INT (ud1));
-	  emit_move_insn (dest,
-			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
+	  count_or_emit_insn (temp, GEN_INT (ud1));
+	  count_or_emit_insn (dest,
+			      gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
+	  return;
 	}
-      else
-	{
-	  /* lis; ori; rldicl */
-	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
-	  emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
-	  emit_move_insn (dest,
+
+      /* lis; ori; rldicl */
+      count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
+      count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+      count_or_emit_insn (dest,
 			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
-	}
+      return;
     }
-  else if (ud1 == ud3 && ud2 == ud4)
+
+  if (ud1 == ud3 && ud2 == ud4)
     {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
       HOST_WIDE_INT num = (ud2 << 16) | ud1;
-      rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
+      rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
+
       rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
       rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
-      emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
+      count_or_emit_insn (dest, gen_rtx_IOR (DImode, one, two));
+      return;
     }
-  else if ((ud4 == 0xffff && (ud3 & 0x8000))
-	   || (ud4 == 0 && ! (ud3 & 0x8000)))
-    {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
 
-      emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
+  if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
+    {
+      count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
       if (ud2 != 0)
-	emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
-      emit_move_insn (ud1 != 0 ? temp : dest,
-		      gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
+	count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
+      count_or_emit_insn (ud1 != 0 ? temp : dest,
+			  gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
       if (ud1 != 0)
-	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+	count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+      return;
     }
-  else if (TARGET_PREFIXED)
+
+  if (TARGET_PREFIXED)
     {
       if (can_create_pseudo_p ())
 	{
-	  /* pli A,L + pli B,H + rldimi A,B,32,0.  */
-	  temp = gen_reg_rtx (DImode);
-	  rtx temp1 = gen_reg_rtx (DImode);
-	  emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
-	  emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
-
-	  emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
-					   GEN_INT (0xffffffff)));
+	  /* pli A,L; pli B,H; rldimi A,B,32,0.  */
+	  rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode);
+	  count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3));
+	  count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
+	  count_or_emit_insn (dest, temp, temp1);
+	  return;
 	}
-      else
-	{
-	  /* pli A,H + sldi A,32 + paddi A,A,L.  */
-	  emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
 
-	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
+      /* There may be 1 insn inaccurate because of no info about dest.  */
+      bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false;
 
-	  bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
+      /* pli A,H; sldi A,32; paddi A,A,L.  */
+      count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3));
+      count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
 
-	  /* Use paddi for the low 32 bits.  */
-	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
-	    emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
+      /* Use paddi for the low 32 bits.  */
+      if (ud2 != 0 && ud1 != 0 && can_use_paddi)
+	count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest,
 						GEN_INT ((ud2 << 16) | ud1)));
-
-	  /* Use oris, ori for low 32 bits.  */
-	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
-	    emit_move_insn (dest,
+      /* Use oris, ori for low 32 bits.  */
+      if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
+	count_or_emit_insn (dest,
 			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
-	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
-	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
-	}
+      if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
+	count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
+      return;
     }
-  else
+
+  if (can_create_pseudo_p ())
     {
-      if (can_create_pseudo_p ())
-	{
-	  /* lis HIGH,UD4 ; ori HIGH,UD3 ;
-	     lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
-	  rtx high = gen_reg_rtx (DImode);
-	  rtx low = gen_reg_rtx (DImode);
-	  HOST_WIDE_INT num = (ud2 << 16) | ud1;
-	  rs6000_emit_set_long_const (low, sext_hwi (num, 32));
-	  num = (ud4 << 16) | ud3;
-	  rs6000_emit_set_long_const (high, sext_hwi (num, 32));
-	  emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
-					   GEN_INT (0xffffffff)));
-	}
-      else
-	{
-	  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
-	     oris DEST,UD2 ; ori DEST,UD1.  */
-	  emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
-	  if (ud3 != 0)
-	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
+      /* lis HIGH,UD4 ; ori HIGH,UD3 ;
+	 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
+      rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
+      rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
+      HOST_WIDE_INT num = (ud2 << 16) | ud1;
+      rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
+      num = (ud4 << 16) | ud3;
+      rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
 
-	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
-	  if (ud2 != 0)
-	    emit_move_insn (dest,
-			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
-	  if (ud1 != 0)
-	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
-	}
+      count_or_emit_insn (dest, high, low);
+      return;
     }
+
+  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
+     oris DEST,UD2 ; ori DEST,UD1.  */
+  count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
+  if (ud3 != 0)
+    count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
+
+  count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
+  if (ud2 != 0)
+    count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
+  if (ud1 != 0)
+    count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
+
+  return;
 }
 
 /* Helper for the following.  Get rid of [r+r] memory refs
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH V3 2/3] Using pli for constant splitting
  2023-12-06  5:24 [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo
@ 2023-12-06  5:24 ` Jiufu Guo
  2023-12-07  6:12   ` Kewen.Lin
  2023-12-06  5:24 ` [PATCH V3 3/3] split complicate constant to memory Jiufu Guo
  2023-12-07  6:01 ` [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin
  2 siblings, 1 reply; 7+ messages in thread
From: Jiufu Guo @ 2023-12-06  5:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu

Hi,

For constant building e.g. r120=0x66666666, which does not fit 'li or lis',
'pli' is used to build this constant via 'emit_move_insn'.

While for a complicated constant, e.g. 0x6666666666666666ULL, when using
'rs6000_emit_set_long_const' to split the constant recursively, it fails to
use 'pli' to build the half part constant: 0x66666666.

'rs6000_emit_set_long_const' could be updated to use 'pli' to build half
part of the constant when necessary.  For example: 0x6666666666666666ULL,
"pli 3,1717986918; rldimi 3,3,32,0" can be used.

Compare with previous:
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636567.html
This verion is refreshed and added with a new testcase.

Bootstrap&regtest pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use
	pli for 34bit constant.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/const_split_pli.c: New test.

---
 gcc/config/rs6000/rs6000.cc                        | 7 +++++++
 gcc/testsuite/gcc.target/powerpc/const_split_pli.c | 9 +++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/const_split_pli.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index dbdc72dce5d..2e074a21a05 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10509,6 +10509,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
 				       GEN_INT (0xffffffff)));
   };
 
+  if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
+    {
+      /* li/lis/pli */
+      count_or_emit_insn (dest, GEN_INT (c));
+      return;
+    }
+
   if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
       || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
     {
diff --git a/gcc/testsuite/gcc.target/powerpc/const_split_pli.c b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c
new file mode 100644
index 00000000000..626c93084aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target power10_ok } */
+
+unsigned long long msk66() { return 0x6666666666666666ULL; }
+
+/* { dg-final { scan-assembler-times {\mpli\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mli\M} } } */
+/* { dg-final { scan-assembler-not {\mlis\M} } } */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH V3 3/3] split complicate constant to memory
  2023-12-06  5:24 [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo
  2023-12-06  5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo
@ 2023-12-06  5:24 ` Jiufu Guo
  2023-12-07  6:01 ` [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin
  2 siblings, 0 replies; 7+ messages in thread
From: Jiufu Guo @ 2023-12-06  5:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu

Hi,

Sometimes, a complicated constant is built via 3(or more)
instructions to build. Generally speaking, it would not be
as fast as loading it from the constant pool (as a few
discussions in PR63281):
* "ld" is one instruction.  If consider "address/toc"
  adjust, we may count it as 2 instructions (the high part
  of address computation could be optimized as nop by linker
  further). And "pld" may need fewer cycles.
* As testing(SPEC2017), it could get better/stable runtime
  if set the threshold as "> 2" (compare with "> 3").

As tested on spec2017, for visible performance changes, we
can find the runtime improvement on 500.perlbench_r about
~1.8% (-O2, P10) with the patch. And for performance
downgrades on other benchmarks, as investigated, the recessions
are not caused by this patch.

Compare with the previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636566.html
This version is refreshed based on the latest code.

Boostrap & regtest pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

	PR target/63281

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_const): Update to split
	complicate constant to memory.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/const_anchors.c: Update to test final-rtl.	
	* gcc.target/powerpc/parall_5insn_const.c: Update to keep original test
	point.
	* gcc.target/powerpc/pr106550.c: Likewise..
	* gcc.target/powerpc/pr106550_1.c: Likewise.
	* gcc.target/powerpc/pr87870.c: Update according to latest behavior.
	* gcc.target/powerpc/pr93012.c: Likewise.

---
 gcc/config/rs6000/rs6000.cc                     | 16 ++++++++++++++++
 .../gcc.target/powerpc/const_anchors.c          |  5 ++---
 .../gcc.target/powerpc/parall_5insn_const.c     | 14 ++++++++++++--
 gcc/testsuite/gcc.target/powerpc/pr106550.c     | 17 +++++++++++++++--
 gcc/testsuite/gcc.target/powerpc/pr106550_1.c   | 15 +++++++++++++--
 gcc/testsuite/gcc.target/powerpc/pr87870.c      |  5 ++++-
 gcc/testsuite/gcc.target/powerpc/pr93012.c      |  5 ++++-
 7 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 2e074a21a05..e44a6da91ae 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10271,6 +10271,22 @@ rs6000_emit_set_const (rtx dest, rtx source)
 	  c = sext_hwi (c, 32);
 	  emit_move_insn (lo, GEN_INT (c));
 	}
+
+      /* If it can be stored to the constant pool and profitable.  */
+      else if (base_reg_operand (dest, mode)
+	       && num_insns_constant (source, mode) > 2)
+	{
+	  rtx sym = force_const_mem (mode, source);
+	  if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0))
+	      && use_toc_relative_ref (XEXP (sym, 0), mode))
+	    {
+	      rtx toc = create_TOC_reference (XEXP (sym, 0), copy_rtx (dest));
+	      sym = gen_const_mem (mode, toc);
+	      set_mem_alias_set (sym, get_TOC_alias_set ());
+	    }
+
+	  emit_insn (gen_rtx_SET (dest, sym));
+	}
       else
 	rs6000_emit_set_long_const (dest, c);
       break;
diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
index 542e2674b12..188744165f2 100644
--- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c
+++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target has_arch_ppc64 } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fdump-rtl-final" } */
 
 #define C1 0x2351847027482577ULL
 #define C2 0x2351847027482578ULL
@@ -16,5 +16,4 @@ void __attribute__ ((noinline)) foo1 (long long *a, long long b)
   if (b)
     *a++ = C2;
 }
-
-/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */
+/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
index e3a9a7264cf..df0690b90be 100644
--- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
+++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
@@ -9,8 +9,18 @@
 void __attribute__ ((noinline)) foo (unsigned long long *a)
 {
   /* 2 lis + 2 ori + 1 rldimi for each constant.  */
-  *a++ = 0x800aabcdc167fa16ULL;
-  *a++ = 0x7543a876867f616ULL;
+  {
+    register long long d asm("r0") = 0x800aabcdc167fa16ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
+  {
+    register long long d asm("r0") = 0x7543a876867f616ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
 }
 
 long long A[] = {0x800aabcdc167fa16ULL, 0x7543a876867f616ULL};
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c
index 74e395331ab..5eca2b2f701 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr106550.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c
@@ -1,12 +1,25 @@
 /* PR target/106550 */
 /* { dg-options "-O2 -mdejagnu-cpu=power10" } */
 /* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target has_arch_ppc64 } */
 
 void
 foo (unsigned long long *a)
 {
-  *a++ = 0x020805006106003; /* pli+pli+rldimi */
-  *a++ = 0x2351847027482577;/* pli+pli+rldimi */  
+  {
+    /* pli+pli+rldimi */
+    register long long d asm("r0") = 0x020805006106003ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
+  {
+    /* pli+pli+rldimi */  
+    register long long d asm("r0") = 0x2351847027482577ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
 }
 
 /* { dg-final { scan-assembler-times {\mpli\M} 4 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
index 5ab40d71a56..80e6b817dff 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
@@ -13,8 +13,19 @@ foo (unsigned long long *a)
   asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
   *a++ = n;
 
-  *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris */
-  *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori */
+  {
+    register long long d asm("r0") = 0x235a8470a7480000ULL; /* pli+sldi+oris */
+    long long n;
+    asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
+
+  {
+    register long long d asm("r0") = 0x23a184700000b677ULL; /* pli+sldi+ori */
+    long long n;
+    asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
 }
 
 /* { dg-final { scan-assembler-times {\mpli\M} 3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c
index d2108ac3386..5fee06744ae 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr87870.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c
@@ -25,4 +25,7 @@ test3 (void)
   return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad;
 }
 
-/* { dg-final { scan-assembler-not {\mld\M} } } */
+/* test3 using "ld" to load the value for r3 and r4.
+   test0, test1 and test2 are using "li".  */
+/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mli\M} 6 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
index 4f764d0576f..b9e869e4285 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
@@ -10,4 +10,7 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
 unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
 unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
 
-/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
+/* { dg-final { scan-assembler-times {\mpli\M} 4 { target has_arch_pwr10 }} } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 7 { target has_arch_pwr10 } } } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 3 { target { ! has_arch_pwr10 } } } } */
+/* { dg-final { scan-assembler-times {\mld\M} 4 { target { ! has_arch_pwr10 } } } } */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns
  2023-12-06  5:24 [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo
  2023-12-06  5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo
  2023-12-06  5:24 ` [PATCH V3 3/3] split complicate constant to memory Jiufu Guo
@ 2023-12-07  6:01 ` Kewen.Lin
  2023-12-08  3:30   ` Jiufu Guo
  2 siblings, 1 reply; 7+ messages in thread
From: Kewen.Lin @ 2023-12-07  6:01 UTC (permalink / raw)
  To: Jiufu Guo; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches

Hi Jeff,

on 2023/12/6 13:24, Jiufu Guo wrote:
> Hi,
> 
> Trunk gcc supports more constants to be built via two instructions:
> e.g. "li/lis; xori/xoris/rldicl/rldicr/rldic".
> And then num_insns_constant should also be updated.
> 
> Function "rs6000_emit_set_long_const" is used to build complicated
> constants; and "num_insns_constant_gpr" is used to compute 'how
> many instructions are needed" to build the constant. So, these 
> two functions should be aligned.
> 
> The idea of this patch is: to reuse "rs6000_emit_set_long_const" to
> compute/record the instruction number(when computing the insn_num, 
> then do not emit instructions).
> 
> Compare with the previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636565.html
> This version updates "rs6000_emit_set_long_const" to use a condition
> if to select either "computing insn number" or "emitting the insn".
> And put them together to avoid misalign in the future.
> 
> Bootstrap & regtest pass ppc64{,le}.
> Is this ok for trunk?
> 
> BR,
> Jeff (Jiufu Guo)
> 
> gcc/ChangeLog:
> 
> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new
> 	parameter to record number of instructions to build the constant.
> 	(num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute
> 	num_insn.
> 
> ---
>  gcc/config/rs6000/rs6000.cc | 272 ++++++++++++++++++------------------
>  1 file changed, 137 insertions(+), 135 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 3dfd79c4c43..dbdc72dce5d 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
>  static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
>  static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
>  static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
> -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
> +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr);
>  static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
>  static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
>  static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
> @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
>  
>    else if (TARGET_POWERPC64)
>      {
> -      HOST_WIDE_INT low = sext_hwi (value, 32);
> -      HOST_WIDE_INT high = value >> 31;
> -
> -      if (high == 0 || high == -1)
> -	return 2;
> -
> -      high >>= 1;
> -
> -      if (low == 0 || low == high)
> -	return num_insns_constant_gpr (high) + 1;
> -      else if (high == 0)
> -	return num_insns_constant_gpr (low) + 1;
> -      else
> -	return (num_insns_constant_gpr (high)
> -		+ num_insns_constant_gpr (low) + 1);
> +      int num_insns = 0;
> +      rs6000_emit_set_long_const (NULL, value, &num_insns);

Nit: Maybe nullptr to align with the others in this patch?

> +      return num_insns;
>      }
>  
>    else
> @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>  
>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>     Output insns to set DEST equal to the constant C as a series of
> -   lis, ori and shl instructions.  */
> +   lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
> +   only increase *NUM_INSNS as the number of insns, and do not output
> +   real insns.  */

Nit: Maybe s/output real/emit any/.

>  
>  static void
> -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
> +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>  {
> -  rtx temp;
> -  int shift;
> -  HOST_WIDE_INT mask;
>    HOST_WIDE_INT ud1, ud2, ud3, ud4;
>  
>    ud1 = c & 0xffff;
> @@ -10509,168 +10496,183 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>    ud3 = (c >> 32) & 0xffff;
>    ud4 = (c >> 48) & 0xffff;
>  
> -  if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
> -      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
> -    emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
> +  /* This lambda is used to emit one insn or just increase the insn count.
> +     When counting the insn number, no need to emit the insn.  Here, two
> +     kinds of insns are needed: move and rldimi. */

Can we make the latter a bit more generic?  Like something below?

> +  auto count_or_emit_insn = [&num_insns] (rtx dest, rtx op1, rtx op2 = NULL) {
> +    if (num_insns)
> +      (*num_insns)++;

Nit: Make it early return.

> +    else if (!op2)
> +      emit_move_insn (dest, op1);
> +    else
> +      emit_insn (gen_rotldi3_insert_3 (dest, op1, GEN_INT (32), op2,
> +				       GEN_INT (0xffffffff)));


[&num_insns] (rtx dest_or_insn, rtx src)

if (src)
  emit_move_insn (dest_or_insn, src);
else
  emit_insn (dest_or_insn);


> +  };
>  
> -  else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
> -	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
> +  if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
> +      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
>      {
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +      /* li */
> +      count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
> +      return;
> +    }
> +
> +  rtx temp = num_insns ? nullptr
> +		       : can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest;

Nit: Maybe

temp = (num_insns || !can_create_pseudo_p ()) ? dest: gen_reg_rtx (DImode);

since NULL passed as dest for num_insns.

>  
> -      emit_move_insn (ud1 != 0 ? temp : dest,
> -		      GEN_INT (sext_hwi (ud2 << 16, 32)));
> +  if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
> +      || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
> +    {
> +      /* lis[; ori] */
> +      count_or_emit_insn (ud1 != 0 ? temp : dest,
> +			  GEN_INT (sext_hwi (ud2 << 16, 32)));
>        if (ud1 != 0)
> -	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +	count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +      return;
>      }
> -  else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
> +
> +  if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
>      {
>        /* lis; xoris */
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> -      emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
> -      emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
> +      count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
> +      count_or_emit_insn (dest,
> +			  gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
> +      return;
>      }
> -  else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
> +
> +  if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
>      {
>        /* li; xoris */
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> -      emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
> -      emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
> -					 GEN_INT ((ud2 ^ 0xffff) << 16)));
> +      count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
> +      count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp,
> +					     GEN_INT ((ud2 ^ 0xffff) << 16)));
> +      return;
>      }
> -  else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
> -	   || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
> -	   || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
> -	   || can_be_built_by_li_and_rldic (c, &shift, &mask))
> +
> +  int shift;
> +  HOST_WIDE_INT mask;
> +  if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
> +      || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
> +      || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
> +      || can_be_built_by_li_and_rldic (c, &shift, &mask))
>      {
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +      /* li/lis; rldicX */
>        unsigned HOST_WIDE_INT imm = (c | ~mask);
>        imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
>  
> -      emit_move_insn (temp, GEN_INT (imm));
> +      count_or_emit_insn (temp, GEN_INT (imm));
>        if (shift != 0)
>  	temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
>        if (mask != HOST_WIDE_INT_M1)
>  	temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
> -      emit_move_insn (dest, temp);
> -    }
> -  else if (ud3 == 0 && ud4 == 0)
> -    {
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +      count_or_emit_insn (dest, temp);
>  
> -      gcc_assert (ud2 & 0x8000);
> +      return;
> +    }
>  
> -      if (ud1 == 0)
> -	{
> -	  /* lis; rldicl */
> -	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
> -	  emit_move_insn (dest,
> -			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
> -	}
> -      else if (!(ud1 & 0x8000))
> +  if (ud3 == 0 && ud4 == 0)
> +    {
> +      gcc_assert ((ud2 & 0x8000) && ud1 != 0);
> +      if (!(ud1 & 0x8000))
>  	{
>  	  /* li; oris */
> -	  emit_move_insn (temp, GEN_INT (ud1));
> -	  emit_move_insn (dest,
> -			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
> +	  count_or_emit_insn (temp, GEN_INT (ud1));
> +	  count_or_emit_insn (dest,
> +			      gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
> +	  return;
>  	}
> -      else
> -	{
> -	  /* lis; ori; rldicl */
> -	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
> -	  emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> -	  emit_move_insn (dest,
> +
> +      /* lis; ori; rldicl */
> +      count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
> +      count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +      count_or_emit_insn (dest,
>  			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
> -	}
> +      return;
>      }
> -  else if (ud1 == ud3 && ud2 == ud4)
> +
> +  if (ud1 == ud3 && ud2 == ud4)
>      {

Nit: Like the others, it's still preferred to have a comment indicating
what's insn sequence for this hunk, ...

> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>        HOST_WIDE_INT num = (ud2 << 16) | ud1;
> -      rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
> +      rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
> +
>        rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
>        rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
> -      emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
> +      count_or_emit_insn (dest, gen_rtx_IOR (DImode, one, two));
> +      return;
>      }
> -  else if ((ud4 == 0xffff && (ud3 & 0x8000))
> -	   || (ud4 == 0 && ! (ud3 & 0x8000)))
> -    {
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>  
> -      emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
> +  if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
> +    {

... and this.

> +      count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
>        if (ud2 != 0)
> -	emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
> -      emit_move_insn (ud1 != 0 ? temp : dest,
> -		      gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
> +	count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
> +      count_or_emit_insn (ud1 != 0 ? temp : dest,
> +			  gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
>        if (ud1 != 0)
> -	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +	count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +      return;
>      }
> -  else if (TARGET_PREFIXED)
> +
> +  if (TARGET_PREFIXED)
>      {
>        if (can_create_pseudo_p ())
>  	{
> -	  /* pli A,L + pli B,H + rldimi A,B,32,0.  */
> -	  temp = gen_reg_rtx (DImode);
> -	  rtx temp1 = gen_reg_rtx (DImode);
> -	  emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
> -	  emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
> -
> -	  emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
> -					   GEN_INT (0xffffffff)));
> +	  /* pli A,L; pli B,H; rldimi A,B,32,0.  */
> +	  rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode);
> +	  count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3));
> +	  count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
> +	  count_or_emit_insn (dest, temp, temp1);
> +	  return;
>  	}
> -      else
> -	{
> -	  /* pli A,H + sldi A,32 + paddi A,A,L.  */
> -	  emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
>  
> -	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
> +      /* There may be 1 insn inaccurate because of no info about dest.  */
> +      bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false;

Nit: Move this line ...

>  
> -	  bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
> +      /* pli A,H; sldi A,32; paddi A,A,L.  */
> +      count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3));
> +      count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
>  

... here, just before its use.

The others look good to me, thanks!

BR,
Kewen

> -	  /* Use paddi for the low 32 bits.  */
> -	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
> -	    emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
> +      /* Use paddi for the low 32 bits.  */
> +      if (ud2 != 0 && ud1 != 0 && can_use_paddi)
> +	count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest,
>  						GEN_INT ((ud2 << 16) | ud1)));
> -
> -	  /* Use oris, ori for low 32 bits.  */
> -	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
> -	    emit_move_insn (dest,
> +      /* Use oris, ori for low 32 bits.  */
> +      if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
> +	count_or_emit_insn (dest,
>  			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
> -	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> -	}
> +      if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
> +	count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> +      return;
>      }
> -  else
> +
> +  if (can_create_pseudo_p ())
>      {
> -      if (can_create_pseudo_p ())
> -	{
> -	  /* lis HIGH,UD4 ; ori HIGH,UD3 ;
> -	     lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
> -	  rtx high = gen_reg_rtx (DImode);
> -	  rtx low = gen_reg_rtx (DImode);
> -	  HOST_WIDE_INT num = (ud2 << 16) | ud1;
> -	  rs6000_emit_set_long_const (low, sext_hwi (num, 32));
> -	  num = (ud4 << 16) | ud3;
> -	  rs6000_emit_set_long_const (high, sext_hwi (num, 32));
> -	  emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
> -					   GEN_INT (0xffffffff)));
> -	}
> -      else
> -	{
> -	  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
> -	     oris DEST,UD2 ; ori DEST,UD1.  */
> -	  emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
> -	  if (ud3 != 0)
> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
> +      /* lis HIGH,UD4 ; ori HIGH,UD3 ;
> +	 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
> +      rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
> +      rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
> +      HOST_WIDE_INT num = (ud2 << 16) | ud1;
> +      rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
> +      num = (ud4 << 16) | ud3;
> +      rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
>  
> -	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
> -	  if (ud2 != 0)
> -	    emit_move_insn (dest,
> -			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
> -	  if (ud1 != 0)
> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> -	}
> +      count_or_emit_insn (dest, high, low);
> +      return;
>      }
> +
> +  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
> +     oris DEST,UD2 ; ori DEST,UD1.  */
> +  count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
> +  if (ud3 != 0)
> +    count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
> +
> +  count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
> +  if (ud2 != 0)
> +    count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
> +  if (ud1 != 0)
> +    count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> +
> +  return;
>  }
>  
>  /* Helper for the following.  Get rid of [r+r] memory refs


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V3 2/3] Using pli for constant splitting
  2023-12-06  5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo
@ 2023-12-07  6:12   ` Kewen.Lin
  2023-12-08  3:32     ` Jiufu Guo
  0 siblings, 1 reply; 7+ messages in thread
From: Kewen.Lin @ 2023-12-07  6:12 UTC (permalink / raw)
  To: Jiufu Guo; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches

Hi Jeff,

on 2023/12/6 13:24, Jiufu Guo wrote:
> Hi,
> 
> For constant building e.g. r120=0x66666666, which does not fit 'li or lis',
> 'pli' is used to build this constant via 'emit_move_insn'.
> 
> While for a complicated constant, e.g. 0x6666666666666666ULL, when using
> 'rs6000_emit_set_long_const' to split the constant recursively, it fails to
> use 'pli' to build the half part constant: 0x66666666.
> 
> 'rs6000_emit_set_long_const' could be updated to use 'pli' to build half
> part of the constant when necessary.  For example: 0x6666666666666666ULL,
> "pli 3,1717986918; rldimi 3,3,32,0" can be used.
> 
> Compare with previous:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636567.html
> This verion is refreshed and added with a new testcase.
> 
> Bootstrap&regtest pass on ppc64{,le}.
> Is this ok for trunk?
> 
> BR,
> Jeff (Jiufu Guo)
> 
> gcc/ChangeLog:
> 
> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use
> 	pli for 34bit constant.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/powerpc/const_split_pli.c: New test.

Nit: Now we have:

gcc/testsuite/gcc.target/powerpc/const-build.c
gcc/testsuite/gcc.target/powerpc/const_anchors.c
gcc/testsuite/gcc.target/powerpc/const-compare.c

I prefer the name of this new case is like const-build-1.c
(put a detailed comment inside) or const-build-split-pli.c,
to align with the existing.

> 
> ---
>  gcc/config/rs6000/rs6000.cc                        | 7 +++++++
>  gcc/testsuite/gcc.target/powerpc/const_split_pli.c | 9 +++++++++
>  2 files changed, 16 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/const_split_pli.c
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index dbdc72dce5d..2e074a21a05 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10509,6 +10509,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>  				       GEN_INT (0xffffffff)));
>    };
>  
> +  if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
> +    {
> +      /* li/lis/pli */
> +      count_or_emit_insn (dest, GEN_INT (c));
> +      return;
> +    }
> +
>    if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
>        || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
>      {
> diff --git a/gcc/testsuite/gcc.target/powerpc/const_split_pli.c b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c
> new file mode 100644
> index 00000000000..626c93084aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-O2" } */

It needs -mdejagnu-cpu=power10 as well.

> +/* { dg-require-effective-target power10_ok } */
> +
> +unsigned long long msk66() { return 0x6666666666666666ULL; }
> +
> +/* { dg-final { scan-assembler-times {\mpli\M} 1 } } */
> +/* { dg-final { scan-assembler-not {\mli\M} } } */
> +/* { dg-final { scan-assembler-not {\mlis\M} } } */

OK for trunk with the above nits tweaked, thanks!

BR,
Kewen

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns
  2023-12-07  6:01 ` [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin
@ 2023-12-08  3:30   ` Jiufu Guo
  0 siblings, 0 replies; 7+ messages in thread
From: Jiufu Guo @ 2023-12-08  3:30 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches


Hi,

Thanks for your always kind and helpful review!!

"Kewen.Lin" <linkw@linux.ibm.com> writes:

> Hi Jeff,
>
> on 2023/12/6 13:24, Jiufu Guo wrote:
>> Hi,
>> 
>> Trunk gcc supports more constants to be built via two instructions:
>> e.g. "li/lis; xori/xoris/rldicl/rldicr/rldic".
>> And then num_insns_constant should also be updated.
>> 
>> Function "rs6000_emit_set_long_const" is used to build complicated
>> constants; and "num_insns_constant_gpr" is used to compute 'how
>> many instructions are needed" to build the constant. So, these 
>> two functions should be aligned.
>> 
>> The idea of this patch is: to reuse "rs6000_emit_set_long_const" to
>> compute/record the instruction number(when computing the insn_num, 
>> then do not emit instructions).
>> 
>> Compare with the previous version:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636565.html
>> This version updates "rs6000_emit_set_long_const" to use a condition
>> if to select either "computing insn number" or "emitting the insn".
>> And put them together to avoid misalign in the future.
>> 
>> Bootstrap & regtest pass ppc64{,le}.
>> Is this ok for trunk?
>> 
>> BR,
>> Jeff (Jiufu Guo)
>> 
>> gcc/ChangeLog:
>> 
>> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new
>> 	parameter to record number of instructions to build the constant.
>> 	(num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute
>> 	num_insn.
>> 
>> ---
>>  gcc/config/rs6000/rs6000.cc | 272 ++++++++++++++++++------------------
>>  1 file changed, 137 insertions(+), 135 deletions(-)
>> 
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index 3dfd79c4c43..dbdc72dce5d 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
>>  static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
>>  static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
>>  static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
>> -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
>> +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr);
>>  static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
>>  static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
>>  static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
>> @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
>>  
>>    else if (TARGET_POWERPC64)
>>      {
>> -      HOST_WIDE_INT low = sext_hwi (value, 32);
>> -      HOST_WIDE_INT high = value >> 31;
>> -
>> -      if (high == 0 || high == -1)
>> -	return 2;
>> -
>> -      high >>= 1;
>> -
>> -      if (low == 0 || low == high)
>> -	return num_insns_constant_gpr (high) + 1;
>> -      else if (high == 0)
>> -	return num_insns_constant_gpr (low) + 1;
>> -      else
>> -	return (num_insns_constant_gpr (high)
>> -		+ num_insns_constant_gpr (low) + 1);
>> +      int num_insns = 0;
>> +      rs6000_emit_set_long_const (NULL, value, &num_insns);
>
> Nit: Maybe nullptr to align with the others in this patch?
ok.
>
>> +      return num_insns;
>>      }
>>  
>>    else
>> @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>>  
>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>>     Output insns to set DEST equal to the constant C as a series of
>> -   lis, ori and shl instructions.  */
>> +   lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
>> +   only increase *NUM_INSNS as the number of insns, and do not output
>> +   real insns.  */
>
> Nit: Maybe s/output real/emit any/.
Thanks.
>
>>  
>>  static void
>> -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>> +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>  {
>> -  rtx temp;
>> -  int shift;
>> -  HOST_WIDE_INT mask;
>>    HOST_WIDE_INT ud1, ud2, ud3, ud4;
>>  
>>    ud1 = c & 0xffff;
>> @@ -10509,168 +10496,183 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>>    ud3 = (c >> 32) & 0xffff;
>>    ud4 = (c >> 48) & 0xffff;
>>  
>> -  if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
>> -      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
>> -    emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
>> +  /* This lambda is used to emit one insn or just increase the insn count.
>> +     When counting the insn number, no need to emit the insn.  Here, two
>> +     kinds of insns are needed: move and rldimi. */
>
> Can we make the latter a bit more generic?  Like something below?
Great sugguestion! Thanks.
>
>> +  auto count_or_emit_insn = [&num_insns] (rtx dest, rtx op1, rtx op2 = NULL) {
>> +    if (num_insns)
>> +      (*num_insns)++;
>
> Nit: Make it early return.
ok.
>
>> +    else if (!op2)
>> +      emit_move_insn (dest, op1);
>> +    else
>> +      emit_insn (gen_rotldi3_insert_3 (dest, op1, GEN_INT (32), op2,
>> +				       GEN_INT (0xffffffff)));
>
>
> [&num_insns] (rtx dest_or_insn, rtx src)
>
> if (src)
>   emit_move_insn (dest_or_insn, src);
> else
>   emit_insn (dest_or_insn);
>

This could support other gen_X in future.  Thanks!

>
>> +  };
>>  
>> -  else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
>> -	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
>> +  if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
>> +      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
>>      {
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +      /* li */
>> +      count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
>> +      return;
>> +    }
>> +
>> +  rtx temp = num_insns ? nullptr
>> +		       : can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest;
>
> Nit: Maybe
>
> temp = (num_insns || !can_create_pseudo_p ()) ? dest: gen_reg_rtx (DImode);
>
> since NULL passed as dest for num_insns.
ok.
>
>>  
>> -      emit_move_insn (ud1 != 0 ? temp : dest,
>> -		      GEN_INT (sext_hwi (ud2 << 16, 32)));
>> +  if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
>> +      || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
>> +    {
>> +      /* lis[; ori] */
>> +      count_or_emit_insn (ud1 != 0 ? temp : dest,
>> +			  GEN_INT (sext_hwi (ud2 << 16, 32)));
>>        if (ud1 != 0)
>> -	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> +	count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> +      return;
>>      }
>> -  else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
>> +
>> +  if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
>>      {
>>        /* lis; xoris */
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> -      emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
>> -      emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
>> +      count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
>> +      count_or_emit_insn (dest,
>> +			  gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
>> +      return;
>>      }
>> -  else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
>> +
>> +  if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
>>      {
>>        /* li; xoris */
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> -      emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
>> -      emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>> -					 GEN_INT ((ud2 ^ 0xffff) << 16)));
>> +      count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
>> +      count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp,
>> +					     GEN_INT ((ud2 ^ 0xffff) << 16)));
>> +      return;
>>      }
>> -  else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
>> -	   || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
>> -	   || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
>> -	   || can_be_built_by_li_and_rldic (c, &shift, &mask))
>> +
>> +  int shift;
>> +  HOST_WIDE_INT mask;
>> +  if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
>> +      || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
>> +      || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
>> +      || can_be_built_by_li_and_rldic (c, &shift, &mask))
>>      {
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +      /* li/lis; rldicX */
>>        unsigned HOST_WIDE_INT imm = (c | ~mask);
>>        imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
>>  
>> -      emit_move_insn (temp, GEN_INT (imm));
>> +      count_or_emit_insn (temp, GEN_INT (imm));
>>        if (shift != 0)
>>  	temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
>>        if (mask != HOST_WIDE_INT_M1)
>>  	temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
>> -      emit_move_insn (dest, temp);
>> -    }
>> -  else if (ud3 == 0 && ud4 == 0)
>> -    {
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +      count_or_emit_insn (dest, temp);
>>  
>> -      gcc_assert (ud2 & 0x8000);
>> +      return;
>> +    }
>>  
>> -      if (ud1 == 0)
>> -	{
>> -	  /* lis; rldicl */
>> -	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
>> -	  emit_move_insn (dest,
>> -			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
>> -	}
>> -      else if (!(ud1 & 0x8000))
>> +  if (ud3 == 0 && ud4 == 0)
>> +    {
>> +      gcc_assert ((ud2 & 0x8000) && ud1 != 0);
>> +      if (!(ud1 & 0x8000))
>>  	{
>>  	  /* li; oris */
>> -	  emit_move_insn (temp, GEN_INT (ud1));
>> -	  emit_move_insn (dest,
>> -			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
>> +	  count_or_emit_insn (temp, GEN_INT (ud1));
>> +	  count_or_emit_insn (dest,
>> +			      gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
>> +	  return;
>>  	}
>> -      else
>> -	{
>> -	  /* lis; ori; rldicl */
>> -	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
>> -	  emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> -	  emit_move_insn (dest,
>> +
>> +      /* lis; ori; rldicl */
>> +      count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
>> +      count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> +      count_or_emit_insn (dest,
>>  			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
>> -	}
>> +      return;
>>      }
>> -  else if (ud1 == ud3 && ud2 == ud4)
>> +
>> +  if (ud1 == ud3 && ud2 == ud4)
>>      {
>
> Nit: Like the others, it's still preferred to have a comment indicating
> what's insn sequence for this hunk, ...
Understand you point. Since the half 32bit maybe with various insn, so
it may be hard to list the insn seq.  While I also feel we may need a
comment here.
>
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>>        HOST_WIDE_INT num = (ud2 << 16) | ud1;
>> -      rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
>> +      rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
>> +
>>        rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
>>        rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
>> -      emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
>> +      count_or_emit_insn (dest, gen_rtx_IOR (DImode, one, two));
>> +      return;
>>      }
>> -  else if ((ud4 == 0xffff && (ud3 & 0x8000))
>> -	   || (ud4 == 0 && ! (ud3 & 0x8000)))
>> -    {
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>>  
>> -      emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
>> +  if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
>> +    {
>
> ... and this.
ok.
>
>> +      count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
>>        if (ud2 != 0)
>> -	emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
>> -      emit_move_insn (ud1 != 0 ? temp : dest,
>> -		      gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
>> +	count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
>> +      count_or_emit_insn (ud1 != 0 ? temp : dest,
>> +			  gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
>>        if (ud1 != 0)
>> -	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> +	count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> +      return;
>>      }
>> -  else if (TARGET_PREFIXED)
>> +
>> +  if (TARGET_PREFIXED)
>>      {
>>        if (can_create_pseudo_p ())
>>  	{
>> -	  /* pli A,L + pli B,H + rldimi A,B,32,0.  */
>> -	  temp = gen_reg_rtx (DImode);
>> -	  rtx temp1 = gen_reg_rtx (DImode);
>> -	  emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
>> -	  emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
>> -
>> -	  emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
>> -					   GEN_INT (0xffffffff)));
>> +	  /* pli A,L; pli B,H; rldimi A,B,32,0.  */
>> +	  rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode);
>> +	  count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3));
>> +	  count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
>> +	  count_or_emit_insn (dest, temp, temp1);
>> +	  return;
>>  	}
>> -      else
>> -	{
>> -	  /* pli A,H + sldi A,32 + paddi A,A,L.  */
>> -	  emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
>>  
>> -	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
>> +      /* There may be 1 insn inaccurate because of no info about dest.  */
>> +      bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false;
>
> Nit: Move this line ...
>
>>  
>> -	  bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
>> +      /* pli A,H; sldi A,32; paddi A,A,L.  */
>> +      count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3));
>> +      count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
>>  
>
> ... here, just before its use.
ok.
>
> The others look good to me, thanks!

Thanks again for your greate comments.


BR,
Jeff (Jiufu Guo)

>
> BR,
> Kewen
>
>> -	  /* Use paddi for the low 32 bits.  */
>> -	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
>> -	    emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
>> +      /* Use paddi for the low 32 bits.  */
>> +      if (ud2 != 0 && ud1 != 0 && can_use_paddi)
>> +	count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest,
>>  						GEN_INT ((ud2 << 16) | ud1)));
>> -
>> -	  /* Use oris, ori for low 32 bits.  */
>> -	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
>> -	    emit_move_insn (dest,
>> +      /* Use oris, ori for low 32 bits.  */
>> +      if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
>> +	count_or_emit_insn (dest,
>>  			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
>> -	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
>> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
>> -	}
>> +      if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
>> +	count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
>> +      return;
>>      }
>> -  else
>> +
>> +  if (can_create_pseudo_p ())
>>      {
>> -      if (can_create_pseudo_p ())
>> -	{
>> -	  /* lis HIGH,UD4 ; ori HIGH,UD3 ;
>> -	     lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
>> -	  rtx high = gen_reg_rtx (DImode);
>> -	  rtx low = gen_reg_rtx (DImode);
>> -	  HOST_WIDE_INT num = (ud2 << 16) | ud1;
>> -	  rs6000_emit_set_long_const (low, sext_hwi (num, 32));
>> -	  num = (ud4 << 16) | ud3;
>> -	  rs6000_emit_set_long_const (high, sext_hwi (num, 32));
>> -	  emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
>> -					   GEN_INT (0xffffffff)));
>> -	}
>> -      else
>> -	{
>> -	  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
>> -	     oris DEST,UD2 ; ori DEST,UD1.  */
>> -	  emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
>> -	  if (ud3 != 0)
>> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
>> +      /* lis HIGH,UD4 ; ori HIGH,UD3 ;
>> +	 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
>> +      rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
>> +      rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
>> +      HOST_WIDE_INT num = (ud2 << 16) | ud1;
>> +      rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
>> +      num = (ud4 << 16) | ud3;
>> +      rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
>>  
>> -	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
>> -	  if (ud2 != 0)
>> -	    emit_move_insn (dest,
>> -			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
>> -	  if (ud1 != 0)
>> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
>> -	}
>> +      count_or_emit_insn (dest, high, low);
>> +      return;
>>      }
>> +
>> +  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
>> +     oris DEST,UD2 ; ori DEST,UD1.  */
>> +  count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
>> +  if (ud3 != 0)
>> +    count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
>> +
>> +  count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
>> +  if (ud2 != 0)
>> +    count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
>> +  if (ud1 != 0)
>> +    count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
>> +
>> +  return;
>>  }
>>  
>>  /* Helper for the following.  Get rid of [r+r] memory refs

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V3 2/3] Using pli for constant splitting
  2023-12-07  6:12   ` Kewen.Lin
@ 2023-12-08  3:32     ` Jiufu Guo
  0 siblings, 0 replies; 7+ messages in thread
From: Jiufu Guo @ 2023-12-08  3:32 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches


Hi,

Thanks for your insight and helpful review!

"Kewen.Lin" <linkw@linux.ibm.com> writes:

> Hi Jeff,
>
> on 2023/12/6 13:24, Jiufu Guo wrote:
>> Hi,
>> 
>> For constant building e.g. r120=0x66666666, which does not fit 'li or lis',
>> 'pli' is used to build this constant via 'emit_move_insn'.
>> 
>> While for a complicated constant, e.g. 0x6666666666666666ULL, when using
>> 'rs6000_emit_set_long_const' to split the constant recursively, it fails to
>> use 'pli' to build the half part constant: 0x66666666.
>> 
>> 'rs6000_emit_set_long_const' could be updated to use 'pli' to build half
>> part of the constant when necessary.  For example: 0x6666666666666666ULL,
>> "pli 3,1717986918; rldimi 3,3,32,0" can be used.
>> 
>> Compare with previous:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636567.html
>> This verion is refreshed and added with a new testcase.
>> 
>> Bootstrap&regtest pass on ppc64{,le}.
>> Is this ok for trunk?
>> 
>> BR,
>> Jeff (Jiufu Guo)
>> 
>> gcc/ChangeLog:
>> 
>> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use
>> 	pli for 34bit constant.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	* gcc.target/powerpc/const_split_pli.c: New test.
>
> Nit: Now we have:
>
> gcc/testsuite/gcc.target/powerpc/const-build.c
> gcc/testsuite/gcc.target/powerpc/const_anchors.c
> gcc/testsuite/gcc.target/powerpc/const-compare.c
>
> I prefer the name of this new case is like const-build-1.c
> (put a detailed comment inside) or const-build-split-pli.c,
> to align with the existing.
Thanks!
>
>> 
>> ---
>>  gcc/config/rs6000/rs6000.cc                        | 7 +++++++
>>  gcc/testsuite/gcc.target/powerpc/const_split_pli.c | 9 +++++++++
>>  2 files changed, 16 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/const_split_pli.c
>> 
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index dbdc72dce5d..2e074a21a05 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -10509,6 +10509,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>  				       GEN_INT (0xffffffff)));
>>    };
>>  
>> +  if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
>> +    {
>> +      /* li/lis/pli */
>> +      count_or_emit_insn (dest, GEN_INT (c));
>> +      return;
>> +    }
>> +
>>    if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
>>        || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
>>      {
>> diff --git a/gcc/testsuite/gcc.target/powerpc/const_split_pli.c b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c
>> new file mode 100644
>> index 00000000000..626c93084aa
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c
>> @@ -0,0 +1,9 @@
>> +/* { dg-do compile { target lp64 } } */
>> +/* { dg-options "-O2" } */
>
> It needs -mdejagnu-cpu=power10 as well.
Yeap, thanks.
>
>> +/* { dg-require-effective-target power10_ok } */
>> +
>> +unsigned long long msk66() { return 0x6666666666666666ULL; }
>> +
>> +/* { dg-final { scan-assembler-times {\mpli\M} 1 } } */
>> +/* { dg-final { scan-assembler-not {\mli\M} } } */
>> +/* { dg-final { scan-assembler-not {\mlis\M} } } */
>
> OK for trunk with the above nits tweaked, thanks!
>
> BR,
> Kewen

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-12-08  3:32 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-06  5:24 [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo
2023-12-06  5:24 ` [PATCH V3 2/3] Using pli for constant splitting Jiufu Guo
2023-12-07  6:12   ` Kewen.Lin
2023-12-08  3:32     ` Jiufu Guo
2023-12-06  5:24 ` [PATCH V3 3/3] split complicate constant to memory Jiufu Guo
2023-12-07  6:01 ` [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin
2023-12-08  3:30   ` Jiufu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).