public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH,rs6000] Add patterns for combine to support p10 fusion
@ 2020-10-26 21:44 acsawdey
  2020-11-04 18:12 ` Aaron Sawdey
  0 siblings, 1 reply; 3+ messages in thread
From: acsawdey @ 2020-10-26 21:44 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, wschmidt, Aaron Sawdey

From: Aaron Sawdey <acsawdey@linux.ibm.com>

This patch adds the first couple patterns to support p10 fusion. These
will allow combine to create a single insn for a pair of instructions
that that power10 can fuse and execute. These particular ones have the
requirement that only cr0 can be used when fusing a load with a compare
immediate of -1/0/1, so we want combine to put that requirement in, and
if it doesn't work out later the splitter can get used.

This also adds option -mpower10-fusion which defaults on for power10 and
will gate all these fusion patterns. In addition I have added an
undocumented option -mpower10-fusion-ld-cmpi (which may be removed later)
that just controls the load+compare-immediate patterns. I have make
these default on for power10 but they are not disallowed for earlier
processors because it is still valid code. This allows us to test the
correctness of fusion code generation by turning it on explicitly.

The intention is to work through more patterns of this style to support
the rest of the power10 fusion pairs.

Bootstrap and regtest looks good on ppc64le power9 with these patterns
enabled in stage2/stage3 and for regtest. Ok for trunk?

gcc/ChangeLog:

	* config/rs6000/predicates.md: Add const_me_to_1_operand.
	* config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and
	OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER.
	* config/rs6000/rs6000-protos.h (address_ok_for_form): Add
	prototype.
	* config/rs6000/rs6000.c (rs6000_option_override_internal):
	automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi
 	if target is power10.  (rs600_opt_masks): Allow -mpower10-fusion
	in function attributes.  (address_ok_for_form): New function.
	* config/rs6000/rs6000.h: Add MASK_P10_FUSION.
	* config/rs6000/rs6000.md (*ld_cmpi_cr0): New
	define_insn_and_split.
	(*lwa_cmpdi_cr0): New define_insn_and_split.
	(*lwa_cmpwi_cr0): New define_insn_and_split.
	* config/rs6000/rs6000.opt: Add -mpower10-fusion
	and -mpower10-fusion-ld-cmpi.
---
 gcc/config/rs6000/predicates.md   |  5 +++
 gcc/config/rs6000/rs6000-cpus.def |  6 ++-
 gcc/config/rs6000/rs6000-protos.h |  2 +
 gcc/config/rs6000/rs6000.c        | 34 ++++++++++++++++
 gcc/config/rs6000/rs6000.h        |  1 +
 gcc/config/rs6000/rs6000.md       | 68 +++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000.opt      |  8 ++++
 7 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..b75c1ddfb69 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -297,6 +297,11 @@ (define_predicate "const_0_to_1_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
 
+;; Match op = -1, op = 0, or op = 1.
+(define_predicate "const_m1_to_1_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -1, 1)")))
+
 ;; Match op = 0..3.
 (define_predicate "const_0_to_3_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 8d2c1ffd6cf..3e65289d8df 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -82,7 +82,9 @@
 
 #define ISA_3_1_MASKS_SERVER	(ISA_3_0_MASKS_SERVER			\
 				 | OPTION_MASK_POWER10			\
-				 | OTHER_POWER10_MASKS)
+				 | OTHER_POWER10_MASKS			\
+				 | OPTION_MASK_P10_FUSION		\
+				 | OPTION_MASK_P10_FUSION_LD_CMPI)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
@@ -129,6 +131,8 @@
 				 | OPTION_MASK_FLOAT128_KEYWORD		\
 				 | OPTION_MASK_FPRND			\
 				 | OPTION_MASK_POWER10			\
+				 | OPTION_MASK_P10_FUSION		\
+				 | OPTION_MASK_P10_FUSION_LD_CMPI	\
 				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_ISEL			\
 				 | OPTION_MASK_MFCRF			\
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 25fa5dd57cd..d8a344245e6 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -190,6 +190,8 @@ enum non_prefixed_form {
 
 extern enum insn_form address_to_insn_form (rtx, machine_mode,
 					    enum non_prefixed_form);
+extern bool address_ok_for_form (rtx, machine_mode,
+				 enum non_prefixed_form);
 extern bool prefixed_load_p (rtx_insn *);
 extern bool prefixed_store_p (rtx_insn *);
 extern bool prefixed_paddi_p (rtx_insn *);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 4d528a39a37..b8de318a0bc 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4406,6 +4406,12 @@ rs6000_option_override_internal (bool global_init_p)
   if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
     rs6000_isa_flags |= OPTION_MASK_MMA;
 
+  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
+    rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
+
+  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
+    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
+
   /* Turn off vector pair/mma options on non-power10 systems.  */
   else if (!TARGET_POWER10 && TARGET_MMA)
     {
@@ -23391,6 +23397,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "power9-minmax",		OPTION_MASK_P9_MINMAX,		false, true  },
   { "power9-misc",		OPTION_MASK_P9_MISC,		false, true  },
   { "power9-vector",		OPTION_MASK_P9_VECTOR,		false, true  },
+  { "power10-fusion",		OPTION_MASK_P10_FUSION,		false, true  },
   { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
   { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
   { "prefixed",			OPTION_MASK_PREFIXED,		false, true  },
@@ -25482,6 +25489,33 @@ address_to_insn_form (rtx addr,
   return INSN_FORM_BAD;
 }
 
+bool
+address_ok_for_form (rtx addr,
+		     machine_mode mode,
+		     enum non_prefixed_form non_prefixed_format)
+{
+  enum insn_form result_form;
+
+  result_form = address_to_insn_form (addr, mode, non_prefixed_format);
+
+  switch (non_prefixed_format)
+    {
+    case NON_PREFIXED_DS:
+      switch (result_form)
+	{
+	case INSN_FORM_DS:
+	case INSN_FORM_BASE_REG:
+	  return true;
+	default:
+	  break;
+	}
+      break;
+    default:
+      break;
+    }
+  return false;
+}
+
 /* Helper function to see if we're potentially looking at lfs/stfs.
    - PARALLEL containing a SET and a CLOBBER
    - stfs:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bbd8060e143..884452fc6d9 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -539,6 +539,7 @@ extern int rs6000_vector_align[];
 #define MASK_UPDATE			OPTION_MASK_UPDATE
 #define MASK_VSX			OPTION_MASK_VSX
 #define MASK_POWER10			OPTION_MASK_POWER10
+#define MASK_P10_FUSION			OPTION_MASK_P10_FUSION
 
 #ifndef IN_LIBGCC2
 #define MASK_POWERPC64			OPTION_MASK_POWERPC64
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dc060143104..bbcc6abe0f9 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1896,6 +1896,74 @@ (define_insn_and_split "*add<mode>3_imm_dot2"
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
+;; Define an insn for ld+cmpi so we can force it to use CR0 on p10
+;; immediate has to be -1/0/1
+(define_insn_and_split "*ld_cmpi_cr0"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+	(compare:CC (match_operand:DI 1 "memory_operand" "m")
+		    (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_dup 1))
+   ]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
+  "ld %0,%1\;cmpi 0,1,%0,%3"
+  "&& reload_completed
+   && (cc_reg_not_cr0_operand (operands[2], CCmode)
+       || !address_ok_for_form (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2)
+        (compare:CC (match_dup 0)
+		    (match_dup 3)))]
+  ""
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+;; Define an insn for lwa+cmpdi so we can force it to use CR0 on p10
+;; immediate is -1/0/1
+(define_insn_and_split "*lwa_cmpdi_cr0"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+	(compare:CC (sign_extend:DI (match_operand:SI 1 "memory_operand" "m"))
+		    (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI (match_dup 1)))
+   ]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
+  "lwa %0,%1\;cmpdi %0,%3"
+  "&& reload_completed
+   && (cc_reg_not_cr0_operand (operands[2], CCmode)
+       || !address_ok_for_form (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+        (compare:CC (match_dup 0)
+		    (match_dup 3)))]
+  ""
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+;; Define an insn for lwa+cmpwi so we can force it to use CR0 on p10
+;; immediate is -1/0/1
+(define_insn_and_split "*lwa_cmpwi_cr0"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+	(compare:CC (match_operand:SI 1 "memory_operand" "m")
+		    (match_operand:SI 3 "const_m1_to_1_operand" "n")))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI (match_dup 1)))
+   ]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
+  "lwa %0,%1\;cmpwi %0,%3"
+  "&& reload_completed
+   && (cc_reg_not_cr0_operand (operands[2], CCmode)
+       || !address_ok_for_form (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+        (compare:CC (match_dup 0)
+		    (match_dup 3)))]
+  ""
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
 ;; Split an add that we can't do in one insn into two insns, each of which
 ;; does one 16-bit part.  This is used by combine.  Note that the low-order
 ;; add should be last in case the result gets used in an address.
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index b2a70e88ca8..63457efb607 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -479,6 +479,14 @@ mpower8-vector
 Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
 Use vector and scalar instructions added in ISA 2.07.
 
+mpower10-fusion
+Target Report Mask(P10_FUSION) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power10.
+
+mpower10-fusion-ld-cmpi
+Target Undocumented Mask(P10_FUSION_LD_CMPI) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power10.
+
 mcrypto
 Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
 Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
-- 
2.18.4


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH,rs6000] Add patterns for combine to support p10 fusion
  2020-10-26 21:44 [PATCH,rs6000] Add patterns for combine to support p10 fusion acsawdey
@ 2020-11-04 18:12 ` Aaron Sawdey
  2020-11-05 22:45   ` will schmidt
  0 siblings, 1 reply; 3+ messages in thread
From: Aaron Sawdey @ 2020-11-04 18:12 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, wschmidt

Ping.

Aaron Sawdey, Ph.D. sawdey@linux.ibm.com
IBM Linux on POWER Toolchain
 

> On Oct 26, 2020, at 4:44 PM, acsawdey@linux.ibm.com wrote:
> 
> From: Aaron Sawdey <acsawdey@linux.ibm.com>
> 
> This patch adds the first couple patterns to support p10 fusion. These
> will allow combine to create a single insn for a pair of instructions
> that that power10 can fuse and execute. These particular ones have the
> requirement that only cr0 can be used when fusing a load with a compare
> immediate of -1/0/1, so we want combine to put that requirement in, and
> if it doesn't work out later the splitter can get used.
> 
> This also adds option -mpower10-fusion which defaults on for power10 and
> will gate all these fusion patterns. In addition I have added an
> undocumented option -mpower10-fusion-ld-cmpi (which may be removed later)
> that just controls the load+compare-immediate patterns. I have make
> these default on for power10 but they are not disallowed for earlier
> processors because it is still valid code. This allows us to test the
> correctness of fusion code generation by turning it on explicitly.
> 
> The intention is to work through more patterns of this style to support
> the rest of the power10 fusion pairs.
> 
> Bootstrap and regtest looks good on ppc64le power9 with these patterns
> enabled in stage2/stage3 and for regtest. Ok for trunk?
> 
> gcc/ChangeLog:
> 
> 	* config/rs6000/predicates.md: Add const_me_to_1_operand.
> 	* config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and
> 	OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER.
> 	* config/rs6000/rs6000-protos.h (address_ok_for_form): Add
> 	prototype.
> 	* config/rs6000/rs6000.c (rs6000_option_override_internal):
> 	automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi
> 	if target is power10.  (rs600_opt_masks): Allow -mpower10-fusion
> 	in function attributes.  (address_ok_for_form): New function.
> 	* config/rs6000/rs6000.h: Add MASK_P10_FUSION.
> 	* config/rs6000/rs6000.md (*ld_cmpi_cr0): New
> 	define_insn_and_split.
> 	(*lwa_cmpdi_cr0): New define_insn_and_split.
> 	(*lwa_cmpwi_cr0): New define_insn_and_split.
> 	* config/rs6000/rs6000.opt: Add -mpower10-fusion
> 	and -mpower10-fusion-ld-cmpi.
> ---
> gcc/config/rs6000/predicates.md   |  5 +++
> gcc/config/rs6000/rs6000-cpus.def |  6 ++-
> gcc/config/rs6000/rs6000-protos.h |  2 +
> gcc/config/rs6000/rs6000.c        | 34 ++++++++++++++++
> gcc/config/rs6000/rs6000.h        |  1 +
> gcc/config/rs6000/rs6000.md       | 68 +++++++++++++++++++++++++++++++
> gcc/config/rs6000/rs6000.opt      |  8 ++++
> 7 files changed, 123 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index 4c2fe7fa312..b75c1ddfb69 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -297,6 +297,11 @@ (define_predicate "const_0_to_1_operand"
>   (and (match_code "const_int")
>        (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
> 
> +;; Match op = -1, op = 0, or op = 1.
> +(define_predicate "const_m1_to_1_operand"
> +  (and (match_code "const_int")
> +       (match_test "IN_RANGE (INTVAL (op), -1, 1)")))
> +
> ;; Match op = 0..3.
> (define_predicate "const_0_to_3_operand"
>   (and (match_code "const_int")
> diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
> index 8d2c1ffd6cf..3e65289d8df 100644
> --- a/gcc/config/rs6000/rs6000-cpus.def
> +++ b/gcc/config/rs6000/rs6000-cpus.def
> @@ -82,7 +82,9 @@
> 
> #define ISA_3_1_MASKS_SERVER	(ISA_3_0_MASKS_SERVER			\
> 				 | OPTION_MASK_POWER10			\
> -				 | OTHER_POWER10_MASKS)
> +				 | OTHER_POWER10_MASKS			\
> +				 | OPTION_MASK_P10_FUSION		\
> +				 | OPTION_MASK_P10_FUSION_LD_CMPI)
> 
> /* Flags that need to be turned off if -mno-power9-vector.  */
> #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
> @@ -129,6 +131,8 @@
> 				 | OPTION_MASK_FLOAT128_KEYWORD		\
> 				 | OPTION_MASK_FPRND			\
> 				 | OPTION_MASK_POWER10			\
> +				 | OPTION_MASK_P10_FUSION		\
> +				 | OPTION_MASK_P10_FUSION_LD_CMPI	\
> 				 | OPTION_MASK_HTM			\
> 				 | OPTION_MASK_ISEL			\
> 				 | OPTION_MASK_MFCRF			\
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 25fa5dd57cd..d8a344245e6 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -190,6 +190,8 @@ enum non_prefixed_form {
> 
> extern enum insn_form address_to_insn_form (rtx, machine_mode,
> 					    enum non_prefixed_form);
> +extern bool address_ok_for_form (rtx, machine_mode,
> +				 enum non_prefixed_form);
> extern bool prefixed_load_p (rtx_insn *);
> extern bool prefixed_store_p (rtx_insn *);
> extern bool prefixed_paddi_p (rtx_insn *);
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 4d528a39a37..b8de318a0bc 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4406,6 +4406,12 @@ rs6000_option_override_internal (bool global_init_p)
>   if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
>     rs6000_isa_flags |= OPTION_MASK_MMA;
> 
> +  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
> +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
> +
> +  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
> +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
> +
>   /* Turn off vector pair/mma options on non-power10 systems.  */
>   else if (!TARGET_POWER10 && TARGET_MMA)
>     {
> @@ -23391,6 +23397,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
>   { "power9-minmax",		OPTION_MASK_P9_MINMAX,		false, true  },
>   { "power9-misc",		OPTION_MASK_P9_MISC,		false, true  },
>   { "power9-vector",		OPTION_MASK_P9_VECTOR,		false, true  },
> +  { "power10-fusion",		OPTION_MASK_P10_FUSION,		false, true  },
>   { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
>   { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
>   { "prefixed",			OPTION_MASK_PREFIXED,		false, true  },
> @@ -25482,6 +25489,33 @@ address_to_insn_form (rtx addr,
>   return INSN_FORM_BAD;
> }
> 
> +bool
> +address_ok_for_form (rtx addr,
> +		     machine_mode mode,
> +		     enum non_prefixed_form non_prefixed_format)
> +{
> +  enum insn_form result_form;
> +
> +  result_form = address_to_insn_form (addr, mode, non_prefixed_format);
> +
> +  switch (non_prefixed_format)
> +    {
> +    case NON_PREFIXED_DS:
> +      switch (result_form)
> +	{
> +	case INSN_FORM_DS:
> +	case INSN_FORM_BASE_REG:
> +	  return true;
> +	default:
> +	  break;
> +	}
> +      break;
> +    default:
> +      break;
> +    }
> +  return false;
> +}
> +
> /* Helper function to see if we're potentially looking at lfs/stfs.
>    - PARALLEL containing a SET and a CLOBBER
>    - stfs:
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index bbd8060e143..884452fc6d9 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -539,6 +539,7 @@ extern int rs6000_vector_align[];
> #define MASK_UPDATE			OPTION_MASK_UPDATE
> #define MASK_VSX			OPTION_MASK_VSX
> #define MASK_POWER10			OPTION_MASK_POWER10
> +#define MASK_P10_FUSION			OPTION_MASK_P10_FUSION
> 
> #ifndef IN_LIBGCC2
> #define MASK_POWERPC64			OPTION_MASK_POWERPC64
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index dc060143104..bbcc6abe0f9 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -1896,6 +1896,74 @@ (define_insn_and_split "*add<mode>3_imm_dot2"
>    (set_attr "dot" "yes")
>    (set_attr "length" "4,8")])
> 
> +;; Define an insn for ld+cmpi so we can force it to use CR0 on p10
> +;; immediate has to be -1/0/1
> +(define_insn_and_split "*ld_cmpi_cr0"
> +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +	(compare:CC (match_operand:DI 1 "memory_operand" "m")
> +		    (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
> +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +	(match_dup 1))
> +   ]
> +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> +  "ld %0,%1\;cmpi 0,1,%0,%3"
> +  "&& reload_completed
> +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> +       || !address_ok_for_form (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
> +  [(set (match_dup 0) (match_dup 1))
> +   (set (match_dup 2)
> +        (compare:CC (match_dup 0)
> +		    (match_dup 3)))]
> +  ""
> +  [(set_attr "type" "load")
> +   (set_attr "length" "8")])
> +
> +;; Define an insn for lwa+cmpdi so we can force it to use CR0 on p10
> +;; immediate is -1/0/1
> +(define_insn_and_split "*lwa_cmpdi_cr0"
> +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +	(compare:CC (sign_extend:DI (match_operand:SI 1 "memory_operand" "m"))
> +		    (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
> +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +	(sign_extend:DI (match_dup 1)))
> +   ]
> +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> +  "lwa %0,%1\;cmpdi %0,%3"
> +  "&& reload_completed
> +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> +       || !address_ok_for_form (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
> +  [(set (match_dup 0)
> +	(sign_extend:DI (match_dup 1)))
> +   (set (match_dup 2)
> +        (compare:CC (match_dup 0)
> +		    (match_dup 3)))]
> +  ""
> +  [(set_attr "type" "load")
> +   (set_attr "length" "8")])
> +
> +;; Define an insn for lwa+cmpwi so we can force it to use CR0 on p10
> +;; immediate is -1/0/1
> +(define_insn_and_split "*lwa_cmpwi_cr0"
> +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> +	(compare:CC (match_operand:SI 1 "memory_operand" "m")
> +		    (match_operand:SI 3 "const_m1_to_1_operand" "n")))
> +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +	(sign_extend:DI (match_dup 1)))
> +   ]
> +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> +  "lwa %0,%1\;cmpwi %0,%3"
> +  "&& reload_completed
> +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> +       || !address_ok_for_form (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
> +  [(set (match_dup 0)
> +	(sign_extend:DI (match_dup 1)))
> +   (set (match_dup 2)
> +        (compare:CC (match_dup 0)
> +		    (match_dup 3)))]
> +  ""
> +  [(set_attr "type" "load")
> +   (set_attr "length" "8")])
> +
> ;; Split an add that we can't do in one insn into two insns, each of which
> ;; does one 16-bit part.  This is used by combine.  Note that the low-order
> ;; add should be last in case the result gets used in an address.
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index b2a70e88ca8..63457efb607 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -479,6 +479,14 @@ mpower8-vector
> Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
> Use vector and scalar instructions added in ISA 2.07.
> 
> +mpower10-fusion
> +Target Report Mask(P10_FUSION) Var(rs6000_isa_flags)
> +Fuse certain integer operations together for better performance on power10.
> +
> +mpower10-fusion-ld-cmpi
> +Target Undocumented Mask(P10_FUSION_LD_CMPI) Var(rs6000_isa_flags)
> +Fuse certain integer operations together for better performance on power10.
> +
> mcrypto
> Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
> Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
> -- 
> 2.18.4
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH,rs6000] Add patterns for combine to support p10 fusion
  2020-11-04 18:12 ` Aaron Sawdey
@ 2020-11-05 22:45   ` will schmidt
  0 siblings, 0 replies; 3+ messages in thread
From: will schmidt @ 2020-11-05 22:45 UTC (permalink / raw)
  To: Aaron Sawdey, gcc-patches; +Cc: wschmidt, Segher Boessenkool

On Wed, 2020-11-04 at 12:12 -0600, Aaron Sawdey via Gcc-patches wrote:
> Ping.
> 
> Aaron Sawdey, Ph.D. sawdey@linux.ibm.com
> IBM Linux on POWER Toolchain
>  
> 
> > On Oct 26, 2020, at 4:44 PM, acsawdey@linux.ibm.com wrote:
> > 
> > From: Aaron Sawdey <acsawdey@linux.ibm.com>
> > 

Hi, 

> > This patch adds the first couple patterns to support p10 fusion. These
> > will allow combine to create a single insn for a pair of instructions
> > that that power10 can fuse and execute. These particular ones have the

that the power10

s/particular ones/particular insns/ 

> > requirement that only cr0 can be used when fusing a load with a compare
> > immediate of -1/0/1, so we want combine to put that requirement in, and
> > if it doesn't work out later the splitter can get used.
> > 
> > This also adds option -mpower10-fusion which defaults on for power10 and
> > will gate all these fusion patterns. In addition I have added an
> > undocumented option -mpower10-fusion-ld-cmpi (which may be removed later)
> > that just controls the load+compare-immediate patterns.

ok

> > 								 I have make

made

> > these default on for power10 but they are not disallowed for earlier

to on

> > processors because it is still valid code. This allows us to test the
> > correctness of fusion code generation by turning it on explicitly.
> > 
> > The intention is to work through more patterns of this style to support
> > the rest of the power10 fusion pairs.
> > 
> > Bootstrap and regtest looks good on ppc64le power9 with these patterns
> > enabled in stage2/stage3 and for regtest. Ok for trunk?
> > 
> > gcc/ChangeLog:
> > 
> > 	* config/rs6000/predicates.md: Add const_me_to_1_operand.
> > 	* config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and
> > 	OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER.

to ... and OTHER_P9_VECTOR_MASKS

> > 	* config/rs6000/rs6000-protos.h (address_ok_for_form): Add
> > 	prototype.



> > 	* config/rs6000/rs6000.c (rs6000_option_override_internal):
> > 	automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi
> > 	if target is power10.  (rs600_opt_masks): Allow -mpower10-fusion
> > 	in function attributes.  (address_ok_for_form): New function.

ok


> > 	* config/rs6000/rs6000.h: Add MASK_P10_FUSION.

> > 	* config/rs6000/rs6000.md (*ld_cmpi_cr0): New
> > 	define_insn_and_split.
> > 	(*lwa_cmpdi_cr0): New define_insn_and_split.
> > 	(*lwa_cmpwi_cr0): New define_insn_and_split.


> > 	* config/rs6000/rs6000.opt: Add -mpower10-fusion
> > 	and -mpower10-fusion-ld-cmpi.
> > ---
> > gcc/config/rs6000/predicates.md   |  5 +++
> > gcc/config/rs6000/rs6000-cpus.def |  6 ++-
> > gcc/config/rs6000/rs6000-protos.h |  2 +
> > gcc/config/rs6000/rs6000.c        | 34 ++++++++++++++++
> > gcc/config/rs6000/rs6000.h        |  1 +
> > gcc/config/rs6000/rs6000.md       | 68 +++++++++++++++++++++++++++++++
> > gcc/config/rs6000/rs6000.opt      |  8 ++++
> > 7 files changed, 123 insertions(+), 1 deletion(-)
> > 
> > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> > index 4c2fe7fa312..b75c1ddfb69 100644
> > --- a/gcc/config/rs6000/predicates.md
> > +++ b/gcc/config/rs6000/predicates.md
> > @@ -297,6 +297,11 @@ (define_predicate "const_0_to_1_operand"
> >   (and (match_code "const_int")
> >        (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
> > 
> > +;; Match op = -1, op = 0, or op = 1.
> > +(define_predicate "const_m1_to_1_operand"
> > +  (and (match_code "const_int")
> > +       (match_test "IN_RANGE (INTVAL (op), -1, 1)")))
> > +
> > ;; Match op = 0..3.
> > (define_predicate "const_0_to_3_operand"
> >   (and (match_code "const_int")

ok

> > diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
> > index 8d2c1ffd6cf..3e65289d8df 100644
> > --- a/gcc/config/rs6000/rs6000-cpus.def
> > +++ b/gcc/config/rs6000/rs6000-cpus.def
> > @@ -82,7 +82,9 @@
> > 
> > #define ISA_3_1_MASKS_SERVER	(ISA_3_0_MASKS_SERVER			\
> > 				 | OPTION_MASK_POWER10			\
> > -				 | OTHER_POWER10_MASKS)
> > +				 | OTHER_POWER10_MASKS			\
> > +				 | OPTION_MASK_P10_FUSION		\
> > +				 | OPTION_MASK_P10_FUSION_LD_CMPI)
> > 
> > /* Flags that need to be turned off if -mno-power9-vector.  */
> > #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
> > @@ -129,6 +131,8 @@
> > 				 | OPTION_MASK_FLOAT128_KEYWORD		\
> > 				 | OPTION_MASK_FPRND			\
> > 				 | OPTION_MASK_POWER10			\
> > +				 | OPTION_MASK_P10_FUSION		\
> > +				 | OPTION_MASK_P10_FUSION_LD_CMPI	\
> > 				 | OPTION_MASK_HTM			\
> > 				 | OPTION_MASK_ISEL			\
> > 				 | OPTION_MASK_MFCRF			\

ok

> > diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> > index 25fa5dd57cd..d8a344245e6 100644
> > --- a/gcc/config/rs6000/rs6000-protos.h
> > +++ b/gcc/config/rs6000/rs6000-protos.h
> > @@ -190,6 +190,8 @@ enum non_prefixed_form {
> > 
> > extern enum insn_form address_to_insn_form (rtx, machine_mode,
> > 					    enum non_prefixed_form);
> > +extern bool address_ok_for_form (rtx, machine_mode,
> > +				 enum non_prefixed_form);
> > extern bool prefixed_load_p (rtx_insn *);
> > extern bool prefixed_store_p (rtx_insn *);
> > extern bool prefixed_paddi_p (rtx_insn *);
> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> > index 4d528a39a37..b8de318a0bc 100644
> > --- a/gcc/config/rs6000/rs6000.c
> > +++ b/gcc/config/rs6000/rs6000.c
> > @@ -4406,6 +4406,12 @@ rs6000_option_override_internal (bool global_init_p)
> >   if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
> >     rs6000_isa_flags |= OPTION_MASK_MMA;
> > 
> > +  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
> > +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
> > +
> > +  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
> > +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
> > +
> >   /* Turn off vector pair/mma options on non-power10 systems.  */
> >   else if (!TARGET_POWER10 && TARGET_MMA)
> >     {
> > @@ -23391,6 +23397,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
> >   { "power9-minmax",		OPTION_MASK_P9_MINMAX,		false, true  },
> >   { "power9-misc",		OPTION_MASK_P9_MISC,		false, true  },
> >   { "power9-vector",		OPTION_MASK_P9_VECTOR,		false, true  },
> > +  { "power10-fusion",		OPTION_MASK_P10_FUSION,		false, true  },
> >   { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
> >   { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
> >   { "prefixed",			OPTION_MASK_PREFIXED,		false, true  },
> > @@ -25482,6 +25489,33 @@ address_to_insn_form (rtx addr,
> >   return INSN_FORM_BAD;
> > }
> > 
> > +bool
> > +address_ok_for_form (rtx addr,
> > +		     machine_mode mode,
> > +		     enum non_prefixed_form non_prefixed_format)
> > +{
> > +  enum insn_form result_form;
> > +
> > +  result_form = address_to_insn_form (addr, mode, non_prefixed_format);
> > +
> > +  switch (non_prefixed_format)
> > +    {
> > +    case NON_PREFIXED_DS:
> > +      switch (result_form)
> > +	{
> > +	case INSN_FORM_DS:
> > +	case INSN_FORM_BASE_REG:
> > +	  return true;
> > +	default:
> > +	  break;
> > +	}
> > +      break;
> > +    default:
> > +      break;
> > +    }
> > +  return false;
> > +}
> > +
> > /* Helper function to see if we're potentially looking at lfs/stfs.
> >    - PARALLEL containing a SET and a CLOBBER
> >    - stfs:
> > diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> > index bbd8060e143..884452fc6d9 100644
> > --- a/gcc/config/rs6000/rs6000.h
> > +++ b/gcc/config/rs6000/rs6000.h
> > @@ -539,6 +539,7 @@ extern int rs6000_vector_align[];
> > #define MASK_UPDATE			OPTION_MASK_UPDATE
> > #define MASK_VSX			OPTION_MASK_VSX
> > #define MASK_POWER10			OPTION_MASK_POWER10
> > +#define MASK_P10_FUSION			OPTION_MASK_P10_FUSION
> > 
> > #ifndef IN_LIBGCC2
> > #define MASK_POWERPC64			OPTION_MASK_POWERPC64
> > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> > index dc060143104..bbcc6abe0f9 100644
> > --- a/gcc/config/rs6000/rs6000.md
> > +++ b/gcc/config/rs6000/rs6000.md
> > @@ -1896,6 +1896,74 @@ (define_insn_and_split "*add<mode>3_imm_dot2"
> >    (set_attr "dot" "yes")
> >    (set_attr "length" "4,8")])
> > 
> > +;; Define an insn for ld+cmpi so we can force it to use CR0 on p10
> > +;; immediate has to be -1/0/1
> > +(define_insn_and_split "*ld_cmpi_cr0"
> > +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> > +	(compare:CC (match_operand:DI 1 "memory_operand" "m")
> > +		    (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
> > +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> > +	(match_dup 1))
> > +   ]
> > +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> > +  "ld %0,%1\;cmpi 0,1,%0,%3"
> > +  "&& reload_completed
> > +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> > +       || !address_ok_for_form (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
> > +  [(set (match_dup 0) (match_dup 1))
> > +   (set (match_dup 2)
> > +        (compare:CC (match_dup 0)
> > +		    (match_dup 3)))]
> > +  ""
> > +  [(set_attr "type" "load")
> > +   (set_attr "length" "8")])
> > +
> > +;; Define an insn for lwa+cmpdi so we can force it to use CR0 on p10
> > +;; immediate is -1/0/1
> > +(define_insn_and_split "*lwa_cmpdi_cr0"
> > +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> > +	(compare:CC (sign_extend:DI (match_operand:SI 1 "memory_operand" "m"))
> > +		    (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
> > +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> > +	(sign_extend:DI (match_dup 1)))
> > +   ]
> > +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> > +  "lwa %0,%1\;cmpdi %0,%3"
> > +  "&& reload_completed
> > +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> > +       || !address_ok_for_form (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
> > +  [(set (match_dup 0)
> > +	(sign_extend:DI (match_dup 1)))
> > +   (set (match_dup 2)
> > +        (compare:CC (match_dup 0)
> > +		    (match_dup 3)))]
> > +  ""
> > +  [(set_attr "type" "load")
> > +   (set_attr "length" "8")])
> > +
> > +;; Define an insn for lwa+cmpwi so we can force it to use CR0 on p10
> > +;; immediate is -1/0/1
> > +(define_insn_and_split "*lwa_cmpwi_cr0"
> > +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> > +	(compare:CC (match_operand:SI 1 "memory_operand" "m")
> > +		    (match_operand:SI 3 "const_m1_to_1_operand" "n")))
> > +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> > +	(sign_extend:DI (match_dup 1)))
> > +   ]
> > +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> > +  "lwa %0,%1\;cmpwi %0,%3"
> > +  "&& reload_completed
> > +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> > +       || !address_ok_for_form (XEXP (operands[1],0), DImode, NON_PREFIXED_DS))"
> > +  [(set (match_dup 0)
> > +	(sign_extend:DI (match_dup 1)))
> > +   (set (match_dup 2)
> > +        (compare:CC (match_dup 0)
> > +		    (match_dup 3)))]
> > +  ""
> > +  [(set_attr "type" "load")
> > +   (set_attr "length" "8")])
> > +
> > ;; Split an add that we can't do in one insn into two insns, each of which
> > ;; does one 16-bit part.  This is used by combine.  Note that the low-order
> > ;; add should be last in case the result gets used in an address.

skimmed, no comments on the .md parts.

> > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> > index b2a70e88ca8..63457efb607 100644
> > --- a/gcc/config/rs6000/rs6000.opt
> > +++ b/gcc/config/rs6000/rs6000.opt
> > @@ -479,6 +479,14 @@ mpower8-vector
> > Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
> > Use vector and scalar instructions added in ISA 2.07.
> > 
> > +mpower10-fusion
> > +Target Report Mask(P10_FUSION) Var(rs6000_isa_flags)
> > +Fuse certain integer operations together for better performance on power10.
> > +
> > +mpower10-fusion-ld-cmpi
> > +Target Undocumented Mask(P10_FUSION_LD_CMPI) Var(rs6000_isa_flags)
> > +Fuse certain integer operations together for better performance on power10.
> > +
> > mcrypto
> > Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
> > Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
> > -- 
> > 2.18.4
> > 
> 
> 

ok

thanks
-Will




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-11-05 22:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-26 21:44 [PATCH,rs6000] Add patterns for combine to support p10 fusion acsawdey
2020-11-04 18:12 ` Aaron Sawdey
2020-11-05 22:45   ` will schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).