public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns
@ 2023-11-15  3:02 Jiufu Guo
  2023-11-15  3:02 ` [PATCH V2 2/3] Using pli to split 34bits constant Jiufu Guo
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Jiufu Guo @ 2023-11-15  3:02 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu

Hi,

Trunk gcc supports more constants to be built via two instructions: e.g.
"li/lis; xori/xoris/rldicl/rldicr/rldic".
And then num_insns_constant should also be updated.

Function "rs6000_emit_set_long_const" is used to build complicate
constants; and "num_insns_constant_gpr" is used to compute 'how
many instructions are needed" to build the constant. So, these 
two functions should be aligned.

The idea is: reusing "rs6000_emit_set_long_const" to compute/record
the instruction number(when computing the insn_num, then do not emit
 instructions).

Compare with previous verions:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634195.html
This verion adds an argument to "rs6000_emit_set_long_const" to
indicate computing instruction number instead emit intructions.

Bootstrap & regtest pass ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new 
	parameter to record number of instructions to build the constant.
	(num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute
	num_insn.
	(ADJUST_INSN_NUM_AND_RET): New macro.
	(rs6000_emit_set_const): Call rs6000_emit_set_long_const with NULL
	argument.

---
 gcc/config/rs6000/rs6000.cc | 245 +++++++++++++++++++-----------------
 1 file changed, 133 insertions(+), 112 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index cc24dd5301e..ba40dd6eee4 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
-static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
+static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int *);
 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
@@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
 
   else if (TARGET_POWERPC64)
     {
-      HOST_WIDE_INT low = sext_hwi (value, 32);
-      HOST_WIDE_INT high = value >> 31;
-
-      if (high == 0 || high == -1)
-	return 2;
-
-      high >>= 1;
-
-      if (low == 0 || low == high)
-	return num_insns_constant_gpr (high) + 1;
-      else if (high == 0)
-	return num_insns_constant_gpr (low) + 1;
-      else
-	return (num_insns_constant_gpr (high)
-		+ num_insns_constant_gpr (low) + 1);
+      int num_insns = 0;
+      rs6000_emit_set_long_const (NULL, value, &num_insns);
+      return num_insns;
     }
 
   else
@@ -10284,7 +10272,7 @@ rs6000_emit_set_const (rtx dest, rtx source)
 	  emit_move_insn (lo, GEN_INT (c));
 	}
       else
-	rs6000_emit_set_long_const (dest, c);
+	rs6000_emit_set_long_const (dest, c, NULL);
       break;
 
     default:
@@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
 
 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
    Output insns to set DEST equal to the constant C as a series of
-   lis, ori and shl instructions.  */
+   lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
+   only increase *NUM_INSNS as the number of insns, and do not output
+   real insns.  */
 
 static void
-rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
+rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
 {
-  rtx temp;
-  int shift;
-  HOST_WIDE_INT mask;
   HOST_WIDE_INT ud1, ud2, ud3, ud4;
 
   ud1 = c & 0xffff;
@@ -10509,41 +10496,71 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
   ud3 = (c >> 32) & 0xffff;
   ud4 = (c >> 48) & 0xffff;
 
+  /* This macro RETURNs this function after increasing *NUM_INSNS!!!  */
+#define ADJUST_INSN_NUM_AND_RET(N)                                             \
+  if (num_insns)                                                               \
+    {                                                                          \
+      *num_insns += (N);                                                       \
+      return;                                                                  \
+    }
+
   if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
-      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
-    emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
+      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
+    {
+      /* li */
+      ADJUST_INSN_NUM_AND_RET (1);
+      emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
+      return;
+    }
+
+  rtx temp = num_insns
+	       ? NULL
+	       : (!can_create_pseudo_p () ? dest : gen_reg_rtx (DImode));
 
-  else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
-	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
+  if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
+      || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
     {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      /* lis[; ori] */
+      ADJUST_INSN_NUM_AND_RET (ud1 != 0 ? 2 : 1);
 
       emit_move_insn (ud1 != 0 ? temp : dest,
 		      GEN_INT (sext_hwi (ud2 << 16, 32)));
       if (ud1 != 0)
 	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+      return;
     }
-  else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
+
+  if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
     {
       /* lis; xoris */
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      ADJUST_INSN_NUM_AND_RET (2);
+
       emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
       emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
+      return;
     }
-  else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
+
+  if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
     {
       /* li; xoris */
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      ADJUST_INSN_NUM_AND_RET (2);
+
       emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
       emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
 					 GEN_INT ((ud2 ^ 0xffff) << 16)));
+      return;
     }
-  else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
-	   || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
-	   || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
-	   || can_be_built_by_li_and_rldic (c, &shift, &mask))
+
+  int shift;
+  HOST_WIDE_INT mask;
+  if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
+      || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
+      || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
+      || can_be_built_by_li_and_rldic (c, &shift, &mask))
     {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      /* li/lis; rldicX */
+      ADJUST_INSN_NUM_AND_RET (2);
+
       unsigned HOST_WIDE_INT imm = (c | ~mask);
       imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
 
@@ -10553,49 +10570,48 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
       if (mask != HOST_WIDE_INT_M1)
 	temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
       emit_move_insn (dest, temp);
+
+      return;
     }
-  else if (ud3 == 0 && ud4 == 0)
-    {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
 
-      gcc_assert (ud2 & 0x8000);
+  if (ud3 == 0 && ud4 == 0)
+    {
+      gcc_assert ((ud2 & 0x8000) && ud1 != 0);
+      ADJUST_INSN_NUM_AND_RET (!(ud1 & 0x8000) ? 2 : 3);
 
-      if (ud1 == 0)
-	{
-	  /* lis; rldicl */
-	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
-	  emit_move_insn (dest,
-			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
-	}
-      else if (!(ud1 & 0x8000))
+      if (!(ud1 & 0x8000))
 	{
 	  /* li; oris */
 	  emit_move_insn (temp, GEN_INT (ud1));
 	  emit_move_insn (dest,
 			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
+	  return;
 	}
-      else
-	{
-	  /* lis; ori; rldicl */
-	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
-	  emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
-	  emit_move_insn (dest,
-			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
-	}
+
+      /* lis; ori; rldicl */
+      emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
+      emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+      emit_move_insn (dest, gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
+      return;
     }
-  else if (ud1 == ud3 && ud2 == ud4)
+
+  if (ud1 == ud3 && ud2 == ud4)
     {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
       HOST_WIDE_INT num = (ud2 << 16) | ud1;
-      rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
+      rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
+
+      /* Adjust(+1) insn number after half part is adjusted.  */
+      ADJUST_INSN_NUM_AND_RET (1);
+
       rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
       rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
       emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
+      return;
     }
-  else if ((ud4 == 0xffff && (ud3 & 0x8000))
-	   || (ud4 == 0 && ! (ud3 & 0x8000)))
+
+  if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
     {
-      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+      ADJUST_INSN_NUM_AND_RET (2 + (ud2 != 0 ? 1 : 0) + (ud1 != 0 ? 1 : 0));
 
       emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
       if (ud2 != 0)
@@ -10604,73 +10620,78 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 		      gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
       if (ud1 != 0)
 	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+      return;
     }
-  else if (TARGET_PREFIXED)
+
+  if (TARGET_PREFIXED)
     {
       if (can_create_pseudo_p ())
 	{
-	  /* pli A,L + pli B,H + rldimi A,B,32,0.  */
-	  temp = gen_reg_rtx (DImode);
+	  /* pli A,L; pli B,H; rldimi A,B,32,0.  */
+	  ADJUST_INSN_NUM_AND_RET (3);
+
 	  rtx temp1 = gen_reg_rtx (DImode);
 	  emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
 	  emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
-
 	  emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
 					   GEN_INT (0xffffffff)));
+	  return;
 	}
-      else
-	{
-	  /* pli A,H + sldi A,32 + paddi A,A,L.  */
-	  emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
 
-	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
+      /* There may be 1 insn inaccurate because of no info about dest.  */
+      ADJUST_INSN_NUM_AND_RET (2 + (ud2 != 0 && ud1 != 0 ? 2 : 1));
 
-	  bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
+      bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
 
-	  /* Use paddi for the low 32 bits.  */
-	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
-	    emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
-						GEN_INT ((ud2 << 16) | ud1)));
+      /* pli A,H; sldi A,32; paddi A,A,L.  */
+      emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
+      emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
 
-	  /* Use oris, ori for low 32 bits.  */
-	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
-	    emit_move_insn (dest,
-			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
-	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
-	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
-	}
+      /* Use paddi for the low 32 bits.  */
+      if (ud2 != 0 && ud1 != 0 && can_use_paddi)
+	emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
+					    GEN_INT ((ud2 << 16) | ud1)));
+      /* Use oris, ori for low 32 bits.  */
+      if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
+	emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
+      if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
+	emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
+      return;
     }
-  else
-    {
-      if (can_create_pseudo_p ())
-	{
-	  /* lis HIGH,UD4 ; ori HIGH,UD3 ;
-	     lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
-	  rtx high = gen_reg_rtx (DImode);
-	  rtx low = gen_reg_rtx (DImode);
-	  HOST_WIDE_INT num = (ud2 << 16) | ud1;
-	  rs6000_emit_set_long_const (low, sext_hwi (num, 32));
-	  num = (ud4 << 16) | ud3;
-	  rs6000_emit_set_long_const (high, sext_hwi (num, 32));
-	  emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
-					   GEN_INT (0xffffffff)));
-	}
-      else
-	{
-	  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
-	     oris DEST,UD2 ; ori DEST,UD1.  */
-	  emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
-	  if (ud3 != 0)
-	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
 
-	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
-	  if (ud2 != 0)
-	    emit_move_insn (dest,
-			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
-	  if (ud1 != 0)
-	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
-	}
+  if (can_create_pseudo_p ())
+    {
+      /* lis HIGH,UD4 ; ori HIGH,UD3 ;
+	 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
+      rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
+      rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
+      HOST_WIDE_INT num = (ud2 << 16) | ud1;
+      rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
+      num = (ud4 << 16) | ud3;
+      rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
+
+      /* Adjust(+1) insn number after 'high' and 'low' parts are adjusted.  */
+      ADJUST_INSN_NUM_AND_RET (1);
+      emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
+				       GEN_INT (0xffffffff)));
+      return;
     }
+
+  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
+     oris DEST,UD2 ; ori DEST,UD1.  */
+  ADJUST_INSN_NUM_AND_RET (2 + (ud3 != 0 ? 1 : 0) + (ud2 != 0 ? 1 : 0)
+			   + (ud1 != 0 ? 1 : 0));
+  emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
+  if (ud3 != 0)
+    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
+
+  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
+  if (ud2 != 0)
+    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
+  if (ud1 != 0)
+    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
+
+  return;
 }
 
 /* Helper for the following.  Get rid of [r+r] memory refs
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH V2 2/3] Using pli to split 34bits constant
  2023-11-15  3:02 [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo
@ 2023-11-15  3:02 ` Jiufu Guo
  2023-11-22  9:18   ` Kewen.Lin
  2023-11-15  3:02 ` [PATCH V2 3/3] split complicate constant to memory Jiufu Guo
  2023-11-22  9:12 ` [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin
  2 siblings, 1 reply; 7+ messages in thread
From: Jiufu Guo @ 2023-11-15  3:02 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu

Hi,

For constants with 16bit values, 'li or lis' can be used to generate
the value.  For 34bit constant, 'pli' is ok to generate the value.
For example: 0x6666666666666666ULL, "pli 3,1717986918; rldimi 3,3,32,0"
can be used.

Compare with previous:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634196.html
This verion updates a testcase to cover this functionality.

Bootstrap&regtest pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use
	pli for 34bit constant.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr93012.c: Update to check pli.

---
 gcc/config/rs6000/rs6000.cc                | 9 +++++++++
 gcc/testsuite/gcc.target/powerpc/pr93012.c | 1 +
 2 files changed, 10 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index ba40dd6eee4..b277c52687b 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10504,6 +10504,15 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
       return;                                                                  \
     }
 
+  if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
+    {
+      /* li/lis/pli */
+      ADJUST_INSN_NUM_AND_RET (1);
+
+      emit_move_insn (dest, GEN_INT (c));
+      return;
+    }
+
   if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
       || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
     {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
index 4f764d0576f..a07ff764bbf 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
@@ -10,4 +10,5 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
 unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
 unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
 
+/* { dg-final { scan-assembler-times {\mpli\M} 4 { target has_arch_pwr10 }} } */
 /* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH V2 3/3] split complicate constant to memory
  2023-11-15  3:02 [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo
  2023-11-15  3:02 ` [PATCH V2 2/3] Using pli to split 34bits constant Jiufu Guo
@ 2023-11-15  3:02 ` Jiufu Guo
  2023-11-22  9:12 ` [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin
  2 siblings, 0 replies; 7+ messages in thread
From: Jiufu Guo @ 2023-11-15  3:02 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu

Hi,

Sometimes, a complicated constant is built via 3(or more)
instructions to build. Generally speaking, it would not be
as faster as loading it from the constant pool (as a few
discussions in PR63281):
* "ld" is one instruction.  If consider "address/toc"
  adjust, we may count it as 2 instructions (the high part
  of address computation could be optimized as nop by linker
  further). And "pld" may need less cycles.
* As testing(SPEC2017), it could get better/stable runtime
  if set the threshold as "> 2" (compare with "> 3").

As tested on spec2017, for visible performance changes, we
can find the runtime improvement on 500.perlbench_r about
~1.8% (-O2, P10) with the patch. And for performance
downgrade on other benchmarks, as investigation, the recessions
are not caused by this patch.

Compare with previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634197.html
This verion updates commit message.

Boostrap & regtest pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)

	PR target/63281

gcc/ChangeLog:

	* config/rs6000/rs6000.cc (rs6000_emit_set_const): Update to split
	complicate constant to memory.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/const_anchors.c: Update to test final-rtl.	
	* gcc.target/powerpc/parall_5insn_const.c: Update to keep original test
	point.
	* gcc.target/powerpc/pr106550.c: Likewise..
	* gcc.target/powerpc/pr106550_1.c: Likewise.
	* gcc.target/powerpc/pr87870.c: Update according to latest behavior.
	* gcc.target/powerpc/pr93012.c: Likewise.

---
 gcc/config/rs6000/rs6000.cc                     | 16 ++++++++++++++++
 .../gcc.target/powerpc/const_anchors.c          |  5 ++---
 .../gcc.target/powerpc/parall_5insn_const.c     | 14 ++++++++++++--
 gcc/testsuite/gcc.target/powerpc/pr106550.c     | 17 +++++++++++++++--
 gcc/testsuite/gcc.target/powerpc/pr106550_1.c   | 15 +++++++++++++--
 gcc/testsuite/gcc.target/powerpc/pr87870.c      |  5 ++++-
 gcc/testsuite/gcc.target/powerpc/pr93012.c      |  4 +++-
 7 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b277c52687b..c878e1030ea 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10271,6 +10271,22 @@ rs6000_emit_set_const (rtx dest, rtx source)
 	  c = sext_hwi (c, 32);
 	  emit_move_insn (lo, GEN_INT (c));
 	}
+
+      /* If it can be stored to the constant pool and profitable.  */
+      else if (base_reg_operand (dest, mode)
+	       && num_insns_constant (source, mode) > 2)
+	{
+	  rtx sym = force_const_mem (mode, source);
+	  if (TARGET_TOC && SYMBOL_REF_P (XEXP (sym, 0))
+	      && use_toc_relative_ref (XEXP (sym, 0), mode))
+	    {
+	      rtx toc = create_TOC_reference (XEXP (sym, 0), copy_rtx (dest));
+	      sym = gen_const_mem (mode, toc);
+	      set_mem_alias_set (sym, get_TOC_alias_set ());
+	    }
+
+	  emit_insn (gen_rtx_SET (dest, sym));
+	}
       else
 	rs6000_emit_set_long_const (dest, c, NULL);
       break;
diff --git a/gcc/testsuite/gcc.target/powerpc/const_anchors.c b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
index 542e2674b12..188744165f2 100644
--- a/gcc/testsuite/gcc.target/powerpc/const_anchors.c
+++ b/gcc/testsuite/gcc.target/powerpc/const_anchors.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target has_arch_ppc64 } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fdump-rtl-final" } */
 
 #define C1 0x2351847027482577ULL
 #define C2 0x2351847027482578ULL
@@ -16,5 +16,4 @@ void __attribute__ ((noinline)) foo1 (long long *a, long long b)
   if (b)
     *a++ = C2;
 }
-
-/* { dg-final { scan-assembler-times {\maddi\M} 2 } } */
+/* { dg-final { scan-rtl-dump-times {\madddi3\M} 2 "final" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
index e3a9a7264cf..df0690b90be 100644
--- a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
+++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
@@ -9,8 +9,18 @@
 void __attribute__ ((noinline)) foo (unsigned long long *a)
 {
   /* 2 lis + 2 ori + 1 rldimi for each constant.  */
-  *a++ = 0x800aabcdc167fa16ULL;
-  *a++ = 0x7543a876867f616ULL;
+  {
+    register long long d asm("r0") = 0x800aabcdc167fa16ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
+  {
+    register long long d asm("r0") = 0x7543a876867f616ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
 }
 
 long long A[] = {0x800aabcdc167fa16ULL, 0x7543a876867f616ULL};
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550.c b/gcc/testsuite/gcc.target/powerpc/pr106550.c
index 74e395331ab..5eca2b2f701 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr106550.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550.c
@@ -1,12 +1,25 @@
 /* PR target/106550 */
 /* { dg-options "-O2 -mdejagnu-cpu=power10" } */
 /* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target has_arch_ppc64 } */
 
 void
 foo (unsigned long long *a)
 {
-  *a++ = 0x020805006106003; /* pli+pli+rldimi */
-  *a++ = 0x2351847027482577;/* pli+pli+rldimi */  
+  {
+    /* pli+pli+rldimi */
+    register long long d asm("r0") = 0x020805006106003ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
+  {
+    /* pli+pli+rldimi */  
+    register long long d asm("r0") = 0x2351847027482577ULL;
+    long long n;
+    asm("mr %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
 }
 
 /* { dg-final { scan-assembler-times {\mpli\M} 4 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
index 7e709fcf9d8..11878d893a4 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr106550_1.c
@@ -12,8 +12,19 @@ foo (unsigned long long *a)
   asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
   *a++ = n;
 
-  *a++ = 0x235a8470a7480000ULL; /* pli+sldi+oris */
-  *a++ = 0x23a184700000b677ULL; /* pli+sldi+ori */
+  {
+    register long long d asm("r0") = 0x235a8470a7480000ULL; /* pli+sldi+oris */
+    long long n;
+    asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
+
+  {
+    register long long d asm("r0") = 0x23a184700000b677ULL; /* pli+sldi+ori */
+    long long n;
+    asm("cntlzd %0, %1" : "=r"(n) : "r"(d));
+    *a++ = n;
+  }
 }
 
 /* { dg-final { scan-assembler-times {\mpli\M} 3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr87870.c b/gcc/testsuite/gcc.target/powerpc/pr87870.c
index d2108ac3386..5fee06744ae 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr87870.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr87870.c
@@ -25,4 +25,7 @@ test3 (void)
   return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad;
 }
 
-/* { dg-final { scan-assembler-not {\mld\M} } } */
+/* test3 using "ld" to load the value for r3 and r4.
+   test0, test1 and test2 are using "li".  */
+/* { dg-final { scan-assembler-times {\mp?ld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mli\M} 6 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
index a07ff764bbf..ef0f8fabcc6 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
@@ -11,4 +11,6 @@ unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
 unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
 
 /* { dg-final { scan-assembler-times {\mpli\M} 4 { target has_arch_pwr10 }} } */
-/* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 7 { target has_arch_pwr10 } } } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 3 { target { ! has_arch_pwr10 } } } } */
+/* { dg-final { scan-assembler-times {\mld\M} 4 { target { ! has_arch_pwr10 } } } } */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns
  2023-11-15  3:02 [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo
  2023-11-15  3:02 ` [PATCH V2 2/3] Using pli to split 34bits constant Jiufu Guo
  2023-11-15  3:02 ` [PATCH V2 3/3] split complicate constant to memory Jiufu Guo
@ 2023-11-22  9:12 ` Kewen.Lin
  2023-11-27  2:59   ` Jiufu Guo
  2 siblings, 1 reply; 7+ messages in thread
From: Kewen.Lin @ 2023-11-22  9:12 UTC (permalink / raw)
  To: Jiufu Guo; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches

Hi,

on 2023/11/15 11:02, Jiufu Guo wrote:
> Hi,
> 
> Trunk gcc supports more constants to be built via two instructions: e.g.
> "li/lis; xori/xoris/rldicl/rldicr/rldic".
> And then num_insns_constant should also be updated.
> 
> Function "rs6000_emit_set_long_const" is used to build complicate
> constants; and "num_insns_constant_gpr" is used to compute 'how
> many instructions are needed" to build the constant. So, these 
> two functions should be aligned.
> 
> The idea is: reusing "rs6000_emit_set_long_const" to compute/record
> the instruction number(when computing the insn_num, then do not emit
>  instructions).
> 
> Compare with previous verions:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634195.html
> This verion adds an argument to "rs6000_emit_set_long_const" to
> indicate computing instruction number instead emit intructions.
> 
> Bootstrap & regtest pass ppc64{,le}.
> Is this ok for trunk?
> 
> BR,
> Jeff (Jiufu Guo)
> 
> gcc/ChangeLog:
> 
> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new 
> 	parameter to record number of instructions to build the constant.
> 	(num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute
> 	num_insn.
> 	(ADJUST_INSN_NUM_AND_RET): New macro.
> 	(rs6000_emit_set_const): Call rs6000_emit_set_long_const with NULL
> 	argument.
> 
> ---
>  gcc/config/rs6000/rs6000.cc | 245 +++++++++++++++++++-----------------
>  1 file changed, 133 insertions(+), 112 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index cc24dd5301e..ba40dd6eee4 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
>  static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
>  static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
>  static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
> -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
> +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int *);

Make the new argument default as nullptr... 

>  static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
>  static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
>  static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
> @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
> 
>    else if (TARGET_POWERPC64)
>      {
> -      HOST_WIDE_INT low = sext_hwi (value, 32);
> -      HOST_WIDE_INT high = value >> 31;
> -
> -      if (high == 0 || high == -1)
> -	return 2;
> -
> -      high >>= 1;
> -
> -      if (low == 0 || low == high)
> -	return num_insns_constant_gpr (high) + 1;
> -      else if (high == 0)
> -	return num_insns_constant_gpr (low) + 1;
> -      else
> -	return (num_insns_constant_gpr (high)
> -		+ num_insns_constant_gpr (low) + 1);
> +      int num_insns = 0;
> +      rs6000_emit_set_long_const (NULL, value, &num_insns);
> +      return num_insns;
>      }
> 
>    else
> @@ -10284,7 +10272,7 @@ rs6000_emit_set_const (rtx dest, rtx source)
>  	  emit_move_insn (lo, GEN_INT (c));
>  	}
>        else
> -	rs6000_emit_set_long_const (dest, c);
> +	rs6000_emit_set_long_const (dest, c, NULL);

... then we don't need to change this line.

>        break;
> 
>      default:
> @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
> 
>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>     Output insns to set DEST equal to the constant C as a series of
> -   lis, ori and shl instructions.  */
> +   lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
> +   only increase *NUM_INSNS as the number of insns, and do not output
> +   real insns.  */
> 
>  static void
> -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
> +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>  {
> -  rtx temp;
> -  int shift;
> -  HOST_WIDE_INT mask;
>    HOST_WIDE_INT ud1, ud2, ud3, ud4;
> 
>    ud1 = c & 0xffff;
> @@ -10509,41 +10496,71 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>    ud3 = (c >> 32) & 0xffff;
>    ud4 = (c >> 48) & 0xffff;
> 
> +  /* This macro RETURNs this function after increasing *NUM_INSNS!!!  */
> +#define ADJUST_INSN_NUM_AND_RET(N)                                             \
> +  if (num_insns)                                                               \
> +    {                                                                          \
> +      *num_insns += (N);                                                       \
> +      return;                                                                  \
> +    }

This macro and its uses below can still have the chance to get the inconsistent
counts, as in some arms the counts get pre-computed.

Can we introduce one lambda function named as count_or_emit_insn and use it to
replace all the current uses of emit_move_insn in this function?  If so, each place
where we emit insn will do the counting accordingly, it avoids the possible mismatch
pre-computed count (out-of-date in some day).

BR,
Kewen

> +
>    if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
> -      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
> -    emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
> +      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
> +    {
> +      /* li */
> +      ADJUST_INSN_NUM_AND_RET (1);
> +      emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
> +      return;
> +    }
> +
> +  rtx temp = num_insns
> +	       ? NULL
> +	       : (!can_create_pseudo_p () ? dest : gen_reg_rtx (DImode));
> 
> -  else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
> -	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
> +  if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
> +      || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
>      {
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +      /* lis[; ori] */
> +      ADJUST_INSN_NUM_AND_RET (ud1 != 0 ? 2 : 1);
> 
>        emit_move_insn (ud1 != 0 ? temp : dest,
>  		      GEN_INT (sext_hwi (ud2 << 16, 32)));
>        if (ud1 != 0)
>  	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +      return;
>      }
> -  else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
> +
> +  if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
>      {
>        /* lis; xoris */
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +      ADJUST_INSN_NUM_AND_RET (2);
> +
>        emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
>        emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
> +      return;
>      }
> -  else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
> +
> +  if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
>      {
>        /* li; xoris */
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +      ADJUST_INSN_NUM_AND_RET (2);
> +
>        emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
>        emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>  					 GEN_INT ((ud2 ^ 0xffff) << 16)));
> +      return;
>      }
> -  else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
> -	   || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
> -	   || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
> -	   || can_be_built_by_li_and_rldic (c, &shift, &mask))
> +
> +  int shift;
> +  HOST_WIDE_INT mask;
> +  if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
> +      || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
> +      || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
> +      || can_be_built_by_li_and_rldic (c, &shift, &mask))
>      {
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +      /* li/lis; rldicX */
> +      ADJUST_INSN_NUM_AND_RET (2);
> +
>        unsigned HOST_WIDE_INT imm = (c | ~mask);
>        imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
> 
> @@ -10553,49 +10570,48 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>        if (mask != HOST_WIDE_INT_M1)
>  	temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
>        emit_move_insn (dest, temp);
> +
> +      return;
>      }
> -  else if (ud3 == 0 && ud4 == 0)
> -    {
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> 
> -      gcc_assert (ud2 & 0x8000);
> +  if (ud3 == 0 && ud4 == 0)
> +    {
> +      gcc_assert ((ud2 & 0x8000) && ud1 != 0);
> +      ADJUST_INSN_NUM_AND_RET (!(ud1 & 0x8000) ? 2 : 3);
> 
> -      if (ud1 == 0)
> -	{
> -	  /* lis; rldicl */
> -	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
> -	  emit_move_insn (dest,
> -			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
> -	}
> -      else if (!(ud1 & 0x8000))
> +      if (!(ud1 & 0x8000))
>  	{
>  	  /* li; oris */
>  	  emit_move_insn (temp, GEN_INT (ud1));
>  	  emit_move_insn (dest,
>  			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
> +	  return;
>  	}
> -      else
> -	{
> -	  /* lis; ori; rldicl */
> -	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
> -	  emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> -	  emit_move_insn (dest,
> -			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
> -	}
> +
> +      /* lis; ori; rldicl */
> +      emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
> +      emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +      emit_move_insn (dest, gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
> +      return;
>      }
> -  else if (ud1 == ud3 && ud2 == ud4)
> +
> +  if (ud1 == ud3 && ud2 == ud4)
>      {
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>        HOST_WIDE_INT num = (ud2 << 16) | ud1;
> -      rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
> +      rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
> +
> +      /* Adjust(+1) insn number after half part is adjusted.  */
> +      ADJUST_INSN_NUM_AND_RET (1);
> +
>        rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
>        rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
>        emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
> +      return;
>      }
> -  else if ((ud4 == 0xffff && (ud3 & 0x8000))
> -	   || (ud4 == 0 && ! (ud3 & 0x8000)))
> +
> +  if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
>      {
> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +      ADJUST_INSN_NUM_AND_RET (2 + (ud2 != 0 ? 1 : 0) + (ud1 != 0 ? 1 : 0));
> 
>        emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
>        if (ud2 != 0)
> @@ -10604,73 +10620,78 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>  		      gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
>        if (ud1 != 0)
>  	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +      return;
>      }
> -  else if (TARGET_PREFIXED)
> +
> +  if (TARGET_PREFIXED)
>      {
>        if (can_create_pseudo_p ())
>  	{
> -	  /* pli A,L + pli B,H + rldimi A,B,32,0.  */
> -	  temp = gen_reg_rtx (DImode);
> +	  /* pli A,L; pli B,H; rldimi A,B,32,0.  */
> +	  ADJUST_INSN_NUM_AND_RET (3);
> +
>  	  rtx temp1 = gen_reg_rtx (DImode);
>  	  emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
>  	  emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
> -
>  	  emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
>  					   GEN_INT (0xffffffff)));
> +	  return;
>  	}
> -      else
> -	{
> -	  /* pli A,H + sldi A,32 + paddi A,A,L.  */
> -	  emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
> 
> -	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
> +      /* There may be 1 insn inaccurate because of no info about dest.  */
> +      ADJUST_INSN_NUM_AND_RET (2 + (ud2 != 0 && ud1 != 0 ? 2 : 1));
> 
> -	  bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
> +      bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
> 
> -	  /* Use paddi for the low 32 bits.  */
> -	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
> -	    emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
> -						GEN_INT ((ud2 << 16) | ud1)));
> +      /* pli A,H; sldi A,32; paddi A,A,L.  */
> +      emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
> +      emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
> 
> -	  /* Use oris, ori for low 32 bits.  */
> -	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
> -	    emit_move_insn (dest,
> -			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
> -	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> -	}
> +      /* Use paddi for the low 32 bits.  */
> +      if (ud2 != 0 && ud1 != 0 && can_use_paddi)
> +	emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
> +					    GEN_INT ((ud2 << 16) | ud1)));
> +      /* Use oris, ori for low 32 bits.  */
> +      if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
> +	emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
> +      if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
> +	emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> +      return;
>      }
> -  else
> -    {
> -      if (can_create_pseudo_p ())
> -	{
> -	  /* lis HIGH,UD4 ; ori HIGH,UD3 ;
> -	     lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
> -	  rtx high = gen_reg_rtx (DImode);
> -	  rtx low = gen_reg_rtx (DImode);
> -	  HOST_WIDE_INT num = (ud2 << 16) | ud1;
> -	  rs6000_emit_set_long_const (low, sext_hwi (num, 32));
> -	  num = (ud4 << 16) | ud3;
> -	  rs6000_emit_set_long_const (high, sext_hwi (num, 32));
> -	  emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
> -					   GEN_INT (0xffffffff)));
> -	}
> -      else
> -	{
> -	  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
> -	     oris DEST,UD2 ; ori DEST,UD1.  */
> -	  emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
> -	  if (ud3 != 0)
> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
> 
> -	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
> -	  if (ud2 != 0)
> -	    emit_move_insn (dest,
> -			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
> -	  if (ud1 != 0)
> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> -	}
> +  if (can_create_pseudo_p ())
> +    {
> +      /* lis HIGH,UD4 ; ori HIGH,UD3 ;
> +	 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
> +      rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
> +      rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
> +      HOST_WIDE_INT num = (ud2 << 16) | ud1;
> +      rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
> +      num = (ud4 << 16) | ud3;
> +      rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
> +
> +      /* Adjust(+1) insn number after 'high' and 'low' parts are adjusted.  */
> +      ADJUST_INSN_NUM_AND_RET (1);
> +      emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
> +				       GEN_INT (0xffffffff)));
> +      return;
>      }
> +
> +  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
> +     oris DEST,UD2 ; ori DEST,UD1.  */
> +  ADJUST_INSN_NUM_AND_RET (2 + (ud3 != 0 ? 1 : 0) + (ud2 != 0 ? 1 : 0)
> +			   + (ud1 != 0 ? 1 : 0));
> +  emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
> +  if (ud3 != 0)
> +    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
> +
> +  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
> +  if (ud2 != 0)
> +    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
> +  if (ud1 != 0)
> +    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> +
> +  return;
>  }
> 
>  /* Helper for the following.  Get rid of [r+r] memory refs


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2 2/3] Using pli to split 34bits constant
  2023-11-15  3:02 ` [PATCH V2 2/3] Using pli to split 34bits constant Jiufu Guo
@ 2023-11-22  9:18   ` Kewen.Lin
  2023-11-27  2:49     ` Jiufu Guo
  0 siblings, 1 reply; 7+ messages in thread
From: Kewen.Lin @ 2023-11-22  9:18 UTC (permalink / raw)
  To: Jiufu Guo; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches

Hi,

on 2023/11/15 11:02, Jiufu Guo wrote:
> Hi,
> 
> For constants with 16bit values, 'li or lis' can be used to generate
> the value.  For 34bit constant, 'pli' is ok to generate the value.
> For example: 0x6666666666666666ULL, "pli 3,1717986918; rldimi 3,3,32,0"
> can be used.

Since now if emit_move_insn with a 34bit constant, it's already adopting
pli.  So it's not obvious to the readers why we want this change, I think
you should probably state the reason here explicitly, like in function 
rs6000_emit_set_long_const it's possible to recursively call itself without
invoking emit_move_insn, then it can result in sub-optimal constant build ...
And for the testing I prefer to have a dedicated test case for it, like
extracting function msk66 from pr93012.c and checking its generated assembly
has pli but not lis and ori on Power10 and up.

The others look good to me.  Thanks!

BR,
Kewen

> 
> Compare with previous:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634196.html
> This verion updates a testcase to cover this functionality.
> 
> Bootstrap&regtest pass on ppc64{,le}.
> Is this ok for trunk?
> 
> BR,
> Jeff (Jiufu Guo)
> 
> gcc/ChangeLog:
> 
> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use
> 	pli for 34bit constant.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/powerpc/pr93012.c: Update to check pli.
> 
> ---
>  gcc/config/rs6000/rs6000.cc                | 9 +++++++++
>  gcc/testsuite/gcc.target/powerpc/pr93012.c | 1 +
>  2 files changed, 10 insertions(+)
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index ba40dd6eee4..b277c52687b 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10504,6 +10504,15 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>        return;                                                                  \
>      }
> 
> +  if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
> +    {
> +      /* li/lis/pli */
> +      ADJUST_INSN_NUM_AND_RET (1);
> +
> +      emit_move_insn (dest, GEN_INT (c));
> +      return;
> +    }
> +
>    if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
>        || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
>      {
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> index 4f764d0576f..a07ff764bbf 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
> @@ -10,4 +10,5 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>  unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
> 
> +/* { dg-final { scan-assembler-times {\mpli\M} 4 { target has_arch_pwr10 }} } */
>  /* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2 2/3] Using pli to split 34bits constant
  2023-11-22  9:18   ` Kewen.Lin
@ 2023-11-27  2:49     ` Jiufu Guo
  0 siblings, 0 replies; 7+ messages in thread
From: Jiufu Guo @ 2023-11-27  2:49 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches


Hi,

"Kewen.Lin" <linkw@linux.ibm.com> writes:

> Hi,
>
> on 2023/11/15 11:02, Jiufu Guo wrote:
>> Hi,
>> 
>> For constants with 16bit values, 'li or lis' can be used to generate
>> the value.  For 34bit constant, 'pli' is ok to generate the value.
>> For example: 0x6666666666666666ULL, "pli 3,1717986918; rldimi 3,3,32,0"
>> can be used.
>
> Since now if emit_move_insn with a 34bit constant, it's already adopting
> pli.  So it's not obvious to the readers why we want this change, I think
> you should probably state the reason here explicitly, like in function 
> rs6000_emit_set_long_const it's possible to recursively call itself without
> invoking emit_move_insn, then it can result in sub-optimal constant build ...
> And for the testing I prefer to have a dedicated test case for it, like
> extracting function msk66 from pr93012.c and checking its generated assembly
> has pli but not lis and ori on Power10 and up.

I would update the message to make it clear.
Thanks so much for your suggestions!

BR,
Jeff (Jiufu Guo)


>
> The others look good to me.  Thanks!
>
> BR,
> Kewen
>
>> 
>> Compare with previous:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634196.html
>> This verion updates a testcase to cover this functionality.
>> 
>> Bootstrap&regtest pass on ppc64{,le}.
>> Is this ok for trunk?
>> 
>> BR,
>> Jeff (Jiufu Guo)
>> 
>> gcc/ChangeLog:
>> 
>> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use
>> 	pli for 34bit constant.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	* gcc.target/powerpc/pr93012.c: Update to check pli.
>> 
>> ---
>>  gcc/config/rs6000/rs6000.cc                | 9 +++++++++
>>  gcc/testsuite/gcc.target/powerpc/pr93012.c | 1 +
>>  2 files changed, 10 insertions(+)
>> 
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index ba40dd6eee4..b277c52687b 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -10504,6 +10504,15 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>        return;                                                                  \
>>      }
>> 
>> +  if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
>> +    {
>> +      /* li/lis/pli */
>> +      ADJUST_INSN_NUM_AND_RET (1);
>> +
>> +      emit_move_insn (dest, GEN_INT (c));
>> +      return;
>> +    }
>> +
>>    if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
>>        || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
>>      {
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93012.c b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>> index 4f764d0576f..a07ff764bbf 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr93012.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr93012.c
>> @@ -10,4 +10,5 @@ unsigned long long mskh1() { return 0xffff9234ffff9234ULL; }
>>  unsigned long long mskl1() { return 0x2bcdffff2bcdffffULL; }
>>  unsigned long long mskse() { return 0xffff1234ffff1234ULL; }
>> 
>> +/* { dg-final { scan-assembler-times {\mpli\M} 4 { target has_arch_pwr10 }} } */
>>  /* { dg-final { scan-assembler-times {\mrldimi\M} 7 } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns
  2023-11-22  9:12 ` [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin
@ 2023-11-27  2:59   ` Jiufu Guo
  0 siblings, 0 replies; 7+ messages in thread
From: Jiufu Guo @ 2023-11-27  2:59 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: segher, dje.gcc, linkw, bergner, gcc-patches


Hi,

"Kewen.Lin" <linkw@linux.ibm.com> writes:

> Hi,
>
> on 2023/11/15 11:02, Jiufu Guo wrote:
>> Hi,
>> 
>> Trunk gcc supports more constants to be built via two instructions: e.g.
>> "li/lis; xori/xoris/rldicl/rldicr/rldic".
>> And then num_insns_constant should also be updated.
>> 
>> Function "rs6000_emit_set_long_const" is used to build complicate
>> constants; and "num_insns_constant_gpr" is used to compute 'how
>> many instructions are needed" to build the constant. So, these 
>> two functions should be aligned.
>> 
>> The idea is: reusing "rs6000_emit_set_long_const" to compute/record
>> the instruction number(when computing the insn_num, then do not emit
>>  instructions).
>> 
>> Compare with previous verions:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634195.html
>> This verion adds an argument to "rs6000_emit_set_long_const" to
>> indicate computing instruction number instead emit intructions.
>> 
>> Bootstrap & regtest pass ppc64{,le}.
>> Is this ok for trunk?
>> 
>> BR,
>> Jeff (Jiufu Guo)
>> 
>> gcc/ChangeLog:
>> 
>> 	* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new 
>> 	parameter to record number of instructions to build the constant.
>> 	(num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute
>> 	num_insn.
>> 	(ADJUST_INSN_NUM_AND_RET): New macro.
>> 	(rs6000_emit_set_const): Call rs6000_emit_set_long_const with NULL
>> 	argument.
>> 
>> ---
>>  gcc/config/rs6000/rs6000.cc | 245 +++++++++++++++++++-----------------
>>  1 file changed, 133 insertions(+), 112 deletions(-)
>> 
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index cc24dd5301e..ba40dd6eee4 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
>>  static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
>>  static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
>>  static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
>> -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
>> +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int *);
>
> Make the new argument default as nullptr...
>
>>  static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
>>  static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
>>  static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
>> @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
>> 
>>    else if (TARGET_POWERPC64)
>>      {
>> -      HOST_WIDE_INT low = sext_hwi (value, 32);
>> -      HOST_WIDE_INT high = value >> 31;
>> -
>> -      if (high == 0 || high == -1)
>> -	return 2;
>> -
>> -      high >>= 1;
>> -
>> -      if (low == 0 || low == high)
>> -	return num_insns_constant_gpr (high) + 1;
>> -      else if (high == 0)
>> -	return num_insns_constant_gpr (low) + 1;
>> -      else
>> -	return (num_insns_constant_gpr (high)
>> -		+ num_insns_constant_gpr (low) + 1);
>> +      int num_insns = 0;
>> +      rs6000_emit_set_long_const (NULL, value, &num_insns);
>> +      return num_insns;
>>      }
>> 
>>    else
>> @@ -10284,7 +10272,7 @@ rs6000_emit_set_const (rtx dest, rtx source)
>>  	  emit_move_insn (lo, GEN_INT (c));
>>  	}
>>        else
>> -	rs6000_emit_set_long_const (dest, c);
>> +	rs6000_emit_set_long_const (dest, c, NULL);
>
> ... then we don't need to change this line.
Yes, thanks.
>
>>        break;
>> 
>>      default:
>> @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
>> 
>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>>     Output insns to set DEST equal to the constant C as a series of
>> -   lis, ori and shl instructions.  */
>> +   lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
>> +   only increase *NUM_INSNS as the number of insns, and do not output
>> +   real insns.  */
>> 
>>  static void
>> -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>> +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>  {
>> -  rtx temp;
>> -  int shift;
>> -  HOST_WIDE_INT mask;
>>    HOST_WIDE_INT ud1, ud2, ud3, ud4;
>> 
>>    ud1 = c & 0xffff;
>> @@ -10509,41 +10496,71 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>>    ud3 = (c >> 32) & 0xffff;
>>    ud4 = (c >> 48) & 0xffff;
>> 
>> +  /* This macro RETURNs this function after increasing *NUM_INSNS!!!  */
>> +#define ADJUST_INSN_NUM_AND_RET(N)                                             \
>> +  if (num_insns)                                                               \
>> +    {                                                                          \
>> +      *num_insns += (N);                                                       \
>> +      return;                                                                  \
>> +    }
>
> This macro and its uses below can still have the chance to get the inconsistent
> counts, as in some arms the counts get pre-computed.
>
> Can we introduce one lambda function named as count_or_emit_insn and use it to
> replace all the current uses of emit_move_insn in this function?  If so, each place
> where we emit insn will do the counting accordingly, it avoids the possible mismatch
> pre-computed count (out-of-date in some day).

Agree, "count_or_emit_insn" is a good idea.
I would update the patch similarly.

BR,
Jeff (Jiufu Guo)

>
> BR,
> Kewen
>
>> +
>>    if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
>> -      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
>> -    emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
>> +      || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
>> +    {
>> +      /* li */
>> +      ADJUST_INSN_NUM_AND_RET (1);
>> +      emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
>> +      return;
>> +    }
>> +
>> +  rtx temp = num_insns
>> +	       ? NULL
>> +	       : (!can_create_pseudo_p () ? dest : gen_reg_rtx (DImode));
>> 
>> -  else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
>> -	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
>> +  if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
>> +      || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
>>      {
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +      /* lis[; ori] */
>> +      ADJUST_INSN_NUM_AND_RET (ud1 != 0 ? 2 : 1);
>> 
>>        emit_move_insn (ud1 != 0 ? temp : dest,
>>  		      GEN_INT (sext_hwi (ud2 << 16, 32)));
>>        if (ud1 != 0)
>>  	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> +      return;
>>      }
>> -  else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
>> +
>> +  if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
>>      {
>>        /* lis; xoris */
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +      ADJUST_INSN_NUM_AND_RET (2);
>> +
>>        emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
>>        emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
>> +      return;
>>      }
>> -  else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
>> +
>> +  if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
>>      {
>>        /* li; xoris */
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +      ADJUST_INSN_NUM_AND_RET (2);
>> +
>>        emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
>>        emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>>  					 GEN_INT ((ud2 ^ 0xffff) << 16)));
>> +      return;
>>      }
>> -  else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
>> -	   || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
>> -	   || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
>> -	   || can_be_built_by_li_and_rldic (c, &shift, &mask))
>> +
>> +  int shift;
>> +  HOST_WIDE_INT mask;
>> +  if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
>> +      || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
>> +      || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
>> +      || can_be_built_by_li_and_rldic (c, &shift, &mask))
>>      {
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +      /* li/lis; rldicX */
>> +      ADJUST_INSN_NUM_AND_RET (2);
>> +
>>        unsigned HOST_WIDE_INT imm = (c | ~mask);
>>        imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
>> 
>> @@ -10553,49 +10570,48 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>>        if (mask != HOST_WIDE_INT_M1)
>>  	temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
>>        emit_move_insn (dest, temp);
>> +
>> +      return;
>>      }
>> -  else if (ud3 == 0 && ud4 == 0)
>> -    {
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> 
>> -      gcc_assert (ud2 & 0x8000);
>> +  if (ud3 == 0 && ud4 == 0)
>> +    {
>> +      gcc_assert ((ud2 & 0x8000) && ud1 != 0);
>> +      ADJUST_INSN_NUM_AND_RET (!(ud1 & 0x8000) ? 2 : 3);
>> 
>> -      if (ud1 == 0)
>> -	{
>> -	  /* lis; rldicl */
>> -	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
>> -	  emit_move_insn (dest,
>> -			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
>> -	}
>> -      else if (!(ud1 & 0x8000))
>> +      if (!(ud1 & 0x8000))
>>  	{
>>  	  /* li; oris */
>>  	  emit_move_insn (temp, GEN_INT (ud1));
>>  	  emit_move_insn (dest,
>>  			  gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
>> +	  return;
>>  	}
>> -      else
>> -	{
>> -	  /* lis; ori; rldicl */
>> -	  emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
>> -	  emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> -	  emit_move_insn (dest,
>> -			  gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
>> -	}
>> +
>> +      /* lis; ori; rldicl */
>> +      emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
>> +      emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> +      emit_move_insn (dest, gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
>> +      return;
>>      }
>> -  else if (ud1 == ud3 && ud2 == ud4)
>> +
>> +  if (ud1 == ud3 && ud2 == ud4)
>>      {
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>>        HOST_WIDE_INT num = (ud2 << 16) | ud1;
>> -      rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
>> +      rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
>> +
>> +      /* Adjust(+1) insn number after half part is adjusted.  */
>> +      ADJUST_INSN_NUM_AND_RET (1);
>> +
>>        rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
>>        rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
>>        emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
>> +      return;
>>      }
>> -  else if ((ud4 == 0xffff && (ud3 & 0x8000))
>> -	   || (ud4 == 0 && ! (ud3 & 0x8000)))
>> +
>> +  if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
>>      {
>> -      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
>> +      ADJUST_INSN_NUM_AND_RET (2 + (ud2 != 0 ? 1 : 0) + (ud1 != 0 ? 1 : 0));
>> 
>>        emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
>>        if (ud2 != 0)
>> @@ -10604,73 +10620,78 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>>  		      gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
>>        if (ud1 != 0)
>>  	emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>> +      return;
>>      }
>> -  else if (TARGET_PREFIXED)
>> +
>> +  if (TARGET_PREFIXED)
>>      {
>>        if (can_create_pseudo_p ())
>>  	{
>> -	  /* pli A,L + pli B,H + rldimi A,B,32,0.  */
>> -	  temp = gen_reg_rtx (DImode);
>> +	  /* pli A,L; pli B,H; rldimi A,B,32,0.  */
>> +	  ADJUST_INSN_NUM_AND_RET (3);
>> +
>>  	  rtx temp1 = gen_reg_rtx (DImode);
>>  	  emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
>>  	  emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
>> -
>>  	  emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
>>  					   GEN_INT (0xffffffff)));
>> +	  return;
>>  	}
>> -      else
>> -	{
>> -	  /* pli A,H + sldi A,32 + paddi A,A,L.  */
>> -	  emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
>> 
>> -	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
>> +      /* There may be 1 insn inaccurate because of no info about dest.  */
>> +      ADJUST_INSN_NUM_AND_RET (2 + (ud2 != 0 && ud1 != 0 ? 2 : 1));
>> 
>> -	  bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
>> +      bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
>> 
>> -	  /* Use paddi for the low 32 bits.  */
>> -	  if (ud2 != 0 && ud1 != 0 && can_use_paddi)
>> -	    emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
>> -						GEN_INT ((ud2 << 16) | ud1)));
>> +      /* pli A,H; sldi A,32; paddi A,A,L.  */
>> +      emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
>> +      emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
>> 
>> -	  /* Use oris, ori for low 32 bits.  */
>> -	  if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
>> -	    emit_move_insn (dest,
>> -			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
>> -	  if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
>> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
>> -	}
>> +      /* Use paddi for the low 32 bits.  */
>> +      if (ud2 != 0 && ud1 != 0 && can_use_paddi)
>> +	emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
>> +					    GEN_INT ((ud2 << 16) | ud1)));
>> +      /* Use oris, ori for low 32 bits.  */
>> +      if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
>> +	emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
>> +      if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
>> +	emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
>> +      return;
>>      }
>> -  else
>> -    {
>> -      if (can_create_pseudo_p ())
>> -	{
>> -	  /* lis HIGH,UD4 ; ori HIGH,UD3 ;
>> -	     lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
>> -	  rtx high = gen_reg_rtx (DImode);
>> -	  rtx low = gen_reg_rtx (DImode);
>> -	  HOST_WIDE_INT num = (ud2 << 16) | ud1;
>> -	  rs6000_emit_set_long_const (low, sext_hwi (num, 32));
>> -	  num = (ud4 << 16) | ud3;
>> -	  rs6000_emit_set_long_const (high, sext_hwi (num, 32));
>> -	  emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
>> -					   GEN_INT (0xffffffff)));
>> -	}
>> -      else
>> -	{
>> -	  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
>> -	     oris DEST,UD2 ; ori DEST,UD1.  */
>> -	  emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
>> -	  if (ud3 != 0)
>> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
>> 
>> -	  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
>> -	  if (ud2 != 0)
>> -	    emit_move_insn (dest,
>> -			    gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
>> -	  if (ud1 != 0)
>> -	    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
>> -	}
>> +  if (can_create_pseudo_p ())
>> +    {
>> +      /* lis HIGH,UD4 ; ori HIGH,UD3 ;
>> +	 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
>> +      rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
>> +      rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
>> +      HOST_WIDE_INT num = (ud2 << 16) | ud1;
>> +      rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
>> +      num = (ud4 << 16) | ud3;
>> +      rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
>> +
>> +      /* Adjust(+1) insn number after 'high' and 'low' parts are adjusted.  */
>> +      ADJUST_INSN_NUM_AND_RET (1);
>> +      emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
>> +				       GEN_INT (0xffffffff)));
>> +      return;
>>      }
>> +
>> +  /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
>> +     oris DEST,UD2 ; ori DEST,UD1.  */
>> +  ADJUST_INSN_NUM_AND_RET (2 + (ud3 != 0 ? 1 : 0) + (ud2 != 0 ? 1 : 0)
>> +			   + (ud1 != 0 ? 1 : 0));
>> +  emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
>> +  if (ud3 != 0)
>> +    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
>> +
>> +  emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
>> +  if (ud2 != 0)
>> +    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
>> +  if (ud1 != 0)
>> +    emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
>> +
>> +  return;
>>  }
>> 
>>  /* Helper for the following.  Get rid of [r+r] memory refs

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-11-27  2:59 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-15  3:02 [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns Jiufu Guo
2023-11-15  3:02 ` [PATCH V2 2/3] Using pli to split 34bits constant Jiufu Guo
2023-11-22  9:18   ` Kewen.Lin
2023-11-27  2:49     ` Jiufu Guo
2023-11-15  3:02 ` [PATCH V2 3/3] split complicate constant to memory Jiufu Guo
2023-11-22  9:12 ` [PATCH V2 1/3]rs6000: update num_insns_constant for 2 insns Kewen.Lin
2023-11-27  2:59   ` Jiufu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).